diff --git a/dev-tools/idea/solr/core/src/test/solr-core-tests.iml b/dev-tools/idea/solr/core/src/test/solr-core-tests.iml index 0ece1b59d5e..e94749de580 100644 --- a/dev-tools/idea/solr/core/src/test/solr-core-tests.iml +++ b/dev-tools/idea/solr/core/src/test/solr-core-tests.iml @@ -15,6 +15,7 @@ + @@ -29,5 +30,7 @@ + + diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 85408c5d9e5..79911beb6c6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -55,6 +55,10 @@ Documentation * LUCENE-5392: Add/improve analysis package documentation to reflect analysis API changes. (Benson Margulies via Robert Muir - pull request #17) +Other + +* LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't + perform as well as other options. (Robert Muir) ======================= Lucene 4.8.0 ======================= @@ -135,6 +139,16 @@ New Features resort the hits from a first pass search using a Sort or an Expression. (Simon Willnauer, Robert Muir, Mike McCandless) +* LUCENE-5558: Add TruncateTokenFilter which truncates terms to + the specified length. (Ahmet Arslan via Robert Muir) + +* LUCENE-2446: Added checksums to lucene index files. As of 4.8, the last 8 + bytes of each file contain a zlib-crc32 checksum. Small metadata files are + verified on load. Larger files can be checked on demand via + AtomicReader.checkIntegrity. You can configure this to happen automatically + before merges by enabling IndexWriterConfig.setCheckIntegrityAtMerge. + (Robert Muir) + API Changes * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues @@ -210,8 +224,18 @@ Bug fixes * LUCENE-5111: Fix WordDelimiterFilter to return offsets in correct order. (Robert Muir) +* LUCENE-5555: Fix SortedInputIterator to correctly encode/decode contexts in presence of payload (Areek Zillur) + +* LUCENE-5559: Add missing argument checks to tokenfilters taking + numeric arguments. (Ahmet Arslan via Robert Muir) + +* LUCENE-5568: Benchmark module's "default.codec" option didn't work. (David Smiley) + Test Framework +* LUCENE-5567: When a suite fails with zombie threads failure marker and count + is not propagated properly. (Dawid Weiss) + * LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _. * LUCENE-5501: Added random out-of-order collection testing (when the collector diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java index 81d13389369..cddd3920c24 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.br; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -64,7 +65,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(BrazilianAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java index e67136be594..8a89ae5a978 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ckb; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -61,7 +62,7 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(SoraniAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java index 6530f34de70..b54739be60e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; import java.io.*; +import java.nio.charset.StandardCharsets; /** * {@link Analyzer} for Czech language. @@ -60,7 +61,7 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java index 4da371222cc..00f7520af9f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.da; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -63,7 +64,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java index 11b9064a752..6cab61ea1f4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.de; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -68,7 +69,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java index ea423c6807d..2ce1965af61 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.es; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -62,7 +63,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java index 66d69374ba9..5f824429772 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.fi; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -63,7 +64,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java index 0605b0ceac0..b86fb80cb86 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.Version; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.Arrays; /** @@ -79,7 +80,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java index f5e4cc402d1..a40276ff6de 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.gl; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -61,7 +62,7 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(GalicianAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java index 327f37fc9de..d2addb81747 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hu; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -63,7 +64,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 01af90f3c4d..bcf3be55303 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -50,6 +50,7 @@ import java.io.OutputStream; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; @@ -672,7 +673,7 @@ public class Dictionary { int flagSep = line.lastIndexOf(FLAG_SEPARATOR); if (flagSep == -1) { CharSequence cleansed = cleanInput(line, sb); - writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8)); + writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8)); } else { String text = line.substring(0, flagSep); CharSequence cleansed = cleanInput(text, sb); @@ -681,10 +682,10 @@ public class Dictionary { sb.append(cleansed); } sb.append(line.substring(flagSep)); - writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8)); + writer.write(sb.toString().getBytes(StandardCharsets.UTF_8)); } } else { - writer.write(line.getBytes(IOUtils.CHARSET_UTF_8)); + writer.write(line.getBytes(StandardCharsets.UTF_8)); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ISO8859_14Decoder.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ISO8859_14Decoder.java index 2d87947ab3d..e1f6e00f722 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ISO8859_14Decoder.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ISO8859_14Decoder.java @@ -21,8 +21,7 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; - -import org.apache.lucene.util.IOUtils; +import java.nio.charset.StandardCharsets; // many hunspell dictionaries use this encoding, yet java does not have it?!?! final class ISO8859_14Decoder extends CharsetDecoder { @@ -43,7 +42,7 @@ final class ISO8859_14Decoder extends CharsetDecoder { }; ISO8859_14Decoder() { - super(IOUtils.CHARSET_UTF_8, 1f, 1f); + super(StandardCharsets.ISO_8859_1 /* fake with similar properties */, 1f, 1f); } @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java index a4651e6bb2f..382bfaef9c8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java @@ -19,13 +19,13 @@ package org.apache.lucene.analysis.it; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; -import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -72,7 +72,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java index 58d1d360401..c6b80ed756b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.lv; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -61,7 +62,7 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(LatvianAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java index 0860689f5a9..f35afc68b6f 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java @@ -32,7 +32,7 @@ public final class LengthFilter extends FilteringTokenFilter { private final int min; private final int max; - + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** @@ -46,6 +46,12 @@ public final class LengthFilter extends FilteringTokenFilter { */ public LengthFilter(Version version, TokenStream in, int min, int max) { super(version, in); + if (min < 0) { + throw new IllegalArgumentException("minimum length must be greater than or equal to zero"); + } + if (min > max) { + throw new IllegalArgumentException("maximum length must not be greater than minimum length"); + } this.min = min; this.max = max; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java index 0c70a672ca3..aa301dc0b89 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java @@ -61,6 +61,9 @@ public final class LimitTokenCountFilter extends TokenFilter { */ public LimitTokenCountFilter(TokenStream in, int maxTokenCount, boolean consumeAllTokens) { super(in); + if (maxTokenCount < 1) { + throw new IllegalArgumentException("maxTokenCount must be greater than zero"); + } this.maxTokenCount = maxTokenCount; this.consumeAllTokens = consumeAllTokens; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java index ac714a20a15..d1596a5076e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java @@ -67,6 +67,9 @@ public final class LimitTokenPositionFilter extends TokenFilter { */ public LimitTokenPositionFilter(TokenStream in, int maxTokenPosition, boolean consumeAllTokens) { super(in); + if (maxTokenPosition < 1) { + throw new IllegalArgumentException("maxTokenPosition must be greater than zero"); + } this.maxTokenPosition = maxTokenPosition; this.consumeAllTokens = consumeAllTokens; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java new file mode 100644 index 00000000000..63e4cc08665 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilter.java @@ -0,0 +1,58 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +import java.io.IOException; + +/** + * A token filter for truncating the terms into a specific length. + * Fixed prefix truncation, as a stemming method, produces good results on Turkish language. + * It is reported that F5, using first 5 characters, produced best results in + * + * Information Retrieval on Turkish Texts + */ +public final class TruncateTokenFilter extends TokenFilter { + + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + private final int length; + + public TruncateTokenFilter(TokenStream input, int length) { + super(input); + if (length < 1) + throw new IllegalArgumentException("length parameter must be a positive number: " + length); + this.length = length; + } + + @Override + public final boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword() && termAttribute.length() > length) + termAttribute.setLength(length); + return true; + } else { + return false; + } + } +} \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java new file mode 100644 index 00000000000..af3e40c93ca --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java @@ -0,0 +1,59 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +import java.util.Map; + +/** + * Factory for {@link org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter}. The following type is recommended for "diacritics-insensitive search" for Turkish. + *
+ * <fieldType name="text_tr_ascii_f5" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.ApostropheFilterFactory"/>
+ *     <filter class="solr.TurkishLowerCaseFilterFactory"/>
+ *     <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
+ *     <filter class="solr.KeywordRepeatFilterFactory"/>
+ *     <filter class="solr.TruncateTokenFilterFactory" prefixLength="5"/>
+ *     <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ */ +public class TruncateTokenFilterFactory extends TokenFilterFactory { + + public static final String PREFIX_LENGTH_KEY = "prefixLength"; + private final byte prefixLength; + + public TruncateTokenFilterFactory(Map args) { + super(args); + prefixLength = Byte.parseByte(get(args, PREFIX_LENGTH_KEY, "5")); + if (prefixLength < 1) + throw new IllegalArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength); + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameter(s): " + args); + } + } + + @Override + public TokenStream create(TokenStream input) { + return new TruncateTokenFilter(input, prefixLength); + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java index 3904919139d..1f29184429d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java @@ -31,16 +31,14 @@ import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.analysis.util.CharacterUtils; import org.apache.lucene.analysis.util.WordlistLoader; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; -import org.apache.lucene.util.fst.FST; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; /** * {@link Analyzer} for Dutch language. @@ -75,7 +73,7 @@ public final class DutchAnalyzer extends Analyzer { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java index 968c5393b8a..ffe519947d0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.no; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -63,7 +64,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java index cc5939cfd8e..721ecbffd0a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.payloads; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.apache.lucene.util.BytesRef; @@ -28,7 +29,7 @@ import org.apache.lucene.util.BytesRef; * **/ public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{ - protected Charset charset = Charset.forName("UTF-8"); + protected Charset charset = StandardCharsets.UTF_8; public IdentityEncoder() { } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java index b21535aeb7d..e513f600906 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java @@ -45,8 +45,8 @@ public class TypeAsPayloadTokenFilter extends TokenFilter { public final boolean incrementToken() throws IOException { if (input.incrementToken()) { String type = typeAtt.type(); - if (type != null && type.equals("") == false) { - payloadAtt.setPayload(new BytesRef(type.getBytes("UTF-8"))); + if (type != null && !type.isEmpty()) { + payloadAtt.setPayload(new BytesRef(type)); } return true; } else { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java index 960ec53c3a0..3bceb5c6ab3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pt; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -62,7 +63,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java index f8da03451c3..f8ad153cfb4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -247,7 +248,7 @@ public abstract class RSLPStemmerBase { // TODO: this parser is ugly, but works. use a jflex grammar instead. try { InputStream is = clazz.getResourceAsStream(resource); - LineNumberReader r = new LineNumberReader(new InputStreamReader(is, "UTF-8")); + LineNumberReader r = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8)); Map steps = new HashMap<>(); String step; while ((step = readLine(r)) != null) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java index 156dcb92b7b..69ab96fa679 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ru; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -53,7 +54,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java index ed2e30d3ede..8ec5ba350e5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java @@ -31,6 +31,12 @@ public class TokenRangeSinkFilter extends TeeSinkTokenFilter.SinkFilter { private int count; public TokenRangeSinkFilter(int lower, int upper) { + if (lower < 1) { + throw new IllegalArgumentException("lower must be greater than zero"); + } + if (lower > upper) { + throw new IllegalArgumentException("lower must not be greater than upper"); + } this.lower = lower; this.upper = upper; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index 5adbfa8467d..463ed2bc174 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -84,6 +84,9 @@ public final class ClassicTokenizer extends Tokenizer { /** Set the max allowed token length. Any token longer * than this is skipped. */ public void setMaxTokenLength(int length) { + if (length < 1) { + throw new IllegalArgumentException("maxTokenLength must be greater than zero"); + } this.maxTokenLength = length; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index a4df17638e6..e269dfeb8cc 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -98,6 +98,9 @@ public final class StandardTokenizer extends Tokenizer { /** Set the max allowed token length. Any token longer * than this is skipped. */ public void setMaxTokenLength(int length) { + if (length < 1) { + throw new IllegalArgumentException("maxTokenLength must be greater than zero"); + } this.maxTokenLength = length; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index 83ce32fd1ac..8934bcf98ec 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -84,6 +84,9 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { /** Set the max allowed token length. Any token longer * than this is skipped. */ public void setMaxTokenLength(int length) { + if (length < 1) { + throw new IllegalArgumentException("maxTokenLength must be greater than zero"); + } this.maxTokenLength = length; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java index c4e24127806..a8878ea2139 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.sv; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -63,7 +64,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java index c06b247815c..7fcbf471c56 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java @@ -24,6 +24,7 @@ import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.HashMap; import java.util.Iterator; @@ -157,8 +158,8 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource /** * Load synonyms with the given {@link SynonymMap.Parser} class. */ - private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { - CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder() + protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java index 4ad1473ed84..5234440d0ac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java @@ -27,6 +27,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -252,7 +253,7 @@ public abstract class AbstractAnalysisFactory { * Returns the resource's lines (with content treated as UTF-8) */ protected final List getLines(ResourceLoader loader, String resource) throws IOException { - return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8); + return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8); } /** same as {@link #getWordSet(ResourceLoader, String, boolean)}, @@ -272,7 +273,7 @@ public abstract class AbstractAnalysisFactory { Reader reader = null; try { stream = loader.openResource(file.trim()); - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); reader = new InputStreamReader(stream, decoder); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java index 9d8890e2ea0..b98c33588c5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util; import java.io.File; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.IOUtils; @@ -97,7 +98,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer { final String comment) throws IOException { Reader reader = null; try { - reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8); + reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8); return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase)); } finally { IOUtils.close(reader); @@ -122,7 +123,7 @@ public abstract class StopwordAnalyzerBase extends Analyzer { Version matchVersion) throws IOException { Reader reader = null; try { - reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8); + reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8); return WordlistLoader.getWordSet(reader, matchVersion); } finally { IOUtils.close(reader); diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory index 64ceb73bd02..9ac5e6a2ea1 100644 --- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory +++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory @@ -69,6 +69,7 @@ org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilterFactory org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory org.apache.lucene.analysis.miscellaneous.TrimFilterFactory +org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java index 85e4d69119f..93cfd5e2194 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java @@ -23,6 +23,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -78,7 +79,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase { //Some sanity checks, but not a full-fledged check public void testHTML() throws Exception { InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html"); - HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8")); + HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, StandardCharsets.UTF_8)); StringBuilder builder = new StringBuilder(); int ch = -1; while ((ch = reader.read()) != -1){ @@ -95,7 +96,7 @@ public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase { public void testMSWord14GeneratedHTML() throws Exception { InputStream stream = getClass().getResourceAsStream("MS-Word 14 generated.htm"); - HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8")); + HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, StandardCharsets.UTF_8)); String gold = "This is a test"; StringBuilder builder = new StringBuilder(); int ch = 0; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java index b433d308f83..1d924e8ff87 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -269,7 +270,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { String luceneResourcesWikiPage; try { reader = new InputStreamReader(getClass().getResourceAsStream - ("LuceneResourcesWikiPage.html"), "UTF-8"); + ("LuceneResourcesWikiPage.html"), StandardCharsets.UTF_8); StringBuilder builder = new StringBuilder(); char[] buffer = new char[1024]; int numCharsRead; @@ -289,7 +290,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { try { List urlList = new ArrayList<>(); bufferedReader = new BufferedReader(new InputStreamReader - (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), "UTF-8")); + (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), StandardCharsets.UTF_8)); String line; while (null != (line = bufferedReader.readLine())) { line = line.trim(); @@ -313,7 +314,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { String randomTextWithEmails; try { reader = new InputStreamReader(getClass().getResourceAsStream - ("random.text.with.email.addresses.txt"), "UTF-8"); + ("random.text.with.email.addresses.txt"), StandardCharsets.UTF_8); StringBuilder builder = new StringBuilder(); char[] buffer = new char[1024]; int numCharsRead; @@ -334,7 +335,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { List emailList = new ArrayList<>(); bufferedReader = new BufferedReader(new InputStreamReader (getClass().getResourceAsStream - ("email.addresses.from.random.text.with.email.addresses.txt"), "UTF-8")); + ("email.addresses.from.random.text.with.email.addresses.txt"), StandardCharsets.UTF_8)); String line; while (null != (line = bufferedReader.readLine())) { line = line.trim(); @@ -383,7 +384,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { String randomTextWithURLs; try { reader = new InputStreamReader(getClass().getResourceAsStream - ("random.text.with.urls.txt"), "UTF-8"); + ("random.text.with.urls.txt"), StandardCharsets.UTF_8); StringBuilder builder = new StringBuilder(); char[] buffer = new char[1024]; int numCharsRead; @@ -404,7 +405,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { List urlList = new ArrayList<>(); bufferedReader = new BufferedReader(new InputStreamReader (getClass().getResourceAsStream - ("urls.from.random.text.with.urls.txt"), "UTF-8")); + ("urls.from.random.text.with.urls.txt"), StandardCharsets.UTF_8)); String line; while (null != (line = bufferedReader.readLine())) { line = line.trim(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java index 578b0d22278..44db27424ca 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunspell; import java.io.File; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -157,7 +158,7 @@ public class TestAllDictionaries extends LuceneTestCase { File f = new File(DICTIONARY_HOME, tests[i]); assert f.exists(); - try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) { ZipEntry dicEntry = zip.getEntry(tests[i+1]); assert dicEntry != null; ZipEntry affEntry = zip.getEntry(tests[i+2]); @@ -186,7 +187,7 @@ public class TestAllDictionaries extends LuceneTestCase { File f = new File(DICTIONARY_HOME, tests[i]); assert f.exists(); - try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) { ZipEntry dicEntry = zip.getEntry(tests[i+1]); assert dicEntry != null; ZipEntry affEntry = zip.getEntry(tests[i+2]); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java index 7a855f14777..68e9d4f0c04 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunspell; import java.io.File; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -173,7 +174,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { File f = new File(DICTIONARY_HOME, tests[i]); assert f.exists(); - try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) { ZipEntry dicEntry = zip.getEntry(tests[i+1]); assert dicEntry != null; ZipEntry affEntry = zip.getEntry(tests[i+2]); @@ -202,7 +203,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { File f = new File(DICTIONARY_HOME, tests[i]); assert f.exists(); - try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) { + try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) { ZipEntry dicEntry = zip.getEntry(tests[i+1]); assert dicEntry != null; ZipEntry affEntry = zip.getEntry(tests[i+2]); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index 030181e89da..63ac534e8f0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import org.apache.lucene.util.BytesRef; @@ -232,10 +233,10 @@ public class TestDictionary extends LuceneTestCase { } public void testSetWithCrazyWhitespaceAndBOMs() throws Exception { - assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); - assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); - assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); - assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(StandardCharsets.UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(StandardCharsets.UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(StandardCharsets.UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(StandardCharsets.UTF_8)))); } public void testFlagWithCrazyWhitespace() throws Exception { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java index d8adb892280..0aa47149601 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.junit.Test; public class TestLengthFilter extends BaseTokenStreamTestCase { @@ -50,4 +51,11 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { checkOneTerm(a, "", ""); } + /** + * checking the validity of constructor arguments + */ + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), -4, -1); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java index cc8e8845a22..40bbe1fb783 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java @@ -1,11 +1,12 @@ package org.apache.lucene.analysis.miscellaneous; -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -31,21 +32,36 @@ public class TestLengthFilterFactory extends BaseTokenStreamFactoryTestCase { TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); ((Tokenizer)stream).setReader(reader); stream = tokenFilterFactory("Length", - "min", "4", - "max", "10").create(stream); + LengthFilterFactory.MIN_KEY, "4", + LengthFilterFactory.MAX_KEY, "10").create(stream); assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 }); } - + /** Test that bogus arguments result in exception */ public void testBogusArguments() throws Exception { try { - tokenFilterFactory("Length", - "min", "4", - "max", "5", + tokenFilterFactory("Length", + LengthFilterFactory.MIN_KEY, "4", + LengthFilterFactory.MAX_KEY, "5", "bogusArg", "bogusValue"); fail(); } catch (IllegalArgumentException expected) { assertTrue(expected.getMessage().contains("Unknown parameters")); } } + + /** Test that invalid arguments result in exception */ + public void testInvalidArguments() throws Exception { + try { + Reader reader = new StringReader("foo foobar super-duper-trooper"); + TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); + ((Tokenizer)stream).setReader(reader); + tokenFilterFactory("Length", + LengthFilterFactory.MIN_KEY, "5", + LengthFilterFactory.MAX_KEY, "4").create(stream); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length")); + } + } } \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java new file mode 100644 index 00000000000..a62868c9fb7 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java @@ -0,0 +1,40 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.junit.Test; + +public class TestLimitTokenCountFilter extends BaseTokenStreamTestCase { + + public void test() throws Exception { + for (final boolean consumeAll : new boolean[]{true, false}) { + MockTokenizer tokenizer = whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6"); + tokenizer.setEnableChecks(consumeAll); + TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll); + assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"}); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new LimitTokenCountFilter(whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6"), -1); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java index 038fa303e85..a335e613170 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java @@ -1,11 +1,12 @@ package org.apache.lucene.analysis.miscellaneous; -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -16,25 +17,28 @@ package org.apache.lucene.analysis.miscellaneous; * limitations under the License. */ -import java.io.Reader; -import java.io.StringReader; - import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase; +import java.io.Reader; +import java.io.StringReader; + public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase { public void test() throws Exception { - Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); - MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); - tokenizer.setReader(reader); - // LimitTokenCountFilter doesn't consume the entire stream that it wraps - tokenizer.setEnableChecks(false); - TokenStream stream = tokenizer; - stream = tokenFilterFactory("LimitTokenCount", - "maxTokenCount", "3").create(stream); - assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" }); + for (final boolean consumeAll : new boolean[]{true, false}) { + Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); + MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + tokenizer.setReader(reader); + tokenizer.setEnableChecks(consumeAll); + TokenStream stream = tokenizer; + stream = tokenFilterFactory("LimitTokenCount", + LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3", + LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll) + ).create(stream); + assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"}); + } } public void testRequired() throws Exception { @@ -44,15 +48,17 @@ public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTest fail(); } catch (IllegalArgumentException e) { assertTrue("exception doesn't mention param: " + e.getMessage(), - 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY)); + 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY)); } } - - /** Test that bogus arguments result in exception */ + + /** + * Test that bogus arguments result in exception + */ public void testBogusArguments() throws Exception { try { - tokenFilterFactory("LimitTokenCount", - "maxTokenCount", "3", + tokenFilterFactory("LimitTokenCount", + LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3", "bogusArg", "bogusValue"); fail(); } catch (IllegalArgumentException expected) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java index 4285305b752..a3dbf8e0c62 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java @@ -16,10 +16,6 @@ package org.apache.lucene.analysis.miscellaneous; * limitations under the License. */ -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; @@ -27,11 +23,15 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.synonym.SynonymFilter; import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.util.CharsRef; +import org.junit.Test; + +import java.io.IOException; +import java.io.StringReader; public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase { public void testMaxPosition2() throws IOException { - for (final boolean consumeAll : new boolean[] { true, false }) { + for (final boolean consumeAll : new boolean[]{true, false}) { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { @@ -42,43 +42,50 @@ public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase { } }; - // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! - assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"), - new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null); - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), - new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null); + // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! + assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"), + new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null); + assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), + new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null); // less than the limit, ensure we behave correctly assertTokenStreamContents(a.tokenStream("dummy", "1 "), - new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null); - + new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null); + // equal to limit - assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), - new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null); + assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), + new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null); } } - + public void testMaxPosition3WithSynomyms() throws IOException { - MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five"); - tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps - - SynonymMap.Builder builder = new SynonymMap.Builder(true); - builder.add(new CharsRef("one"), new CharsRef("first"), true); - builder.add(new CharsRef("one"), new CharsRef("alpha"), true); - builder.add(new CharsRef("one"), new CharsRef("beguine"), true); - CharsRef multiWordCharsRef = new CharsRef(); - SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef); - builder.add(new CharsRef("one"), multiWordCharsRef, true); - SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef); - builder.add(new CharsRef("two"), multiWordCharsRef, true); - SynonymMap synonymMap = builder.build(); - TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); - stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false - - // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. - assertTokenStreamContents(stream, - new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, - new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 }); - + for (final boolean consumeAll : new boolean[]{true, false}) { + MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five"); + // if we are consuming all tokens, we can use the checks, otherwise we can't + tokenizer.setEnableChecks(consumeAll); + + SynonymMap.Builder builder = new SynonymMap.Builder(true); + builder.add(new CharsRef("one"), new CharsRef("first"), true); + builder.add(new CharsRef("one"), new CharsRef("alpha"), true); + builder.add(new CharsRef("one"), new CharsRef("beguine"), true); + CharsRef multiWordCharsRef = new CharsRef(); + SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef); + builder.add(new CharsRef("one"), multiWordCharsRef, true); + SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef); + builder.add(new CharsRef("two"), multiWordCharsRef, true); + SynonymMap synonymMap = builder.build(); + TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); + stream = new LimitTokenPositionFilter(stream, 3, consumeAll); + + // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. + assertTokenStreamContents(stream, + new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"}, + new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new LimitTokenPositionFilter(whitespaceMockTokenizer("one two three four five"), 0); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java index d3b7c62479b..193b18e0855 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java @@ -16,26 +16,30 @@ package org.apache.lucene.analysis.miscellaneous; * limitations under the License. */ -import java.io.Reader; -import java.io.StringReader; - import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase; +import java.io.Reader; +import java.io.StringReader; + public class TestLimitTokenPositionFilterFactory extends BaseTokenStreamFactoryTestCase { public void testMaxPosition1() throws Exception { - Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); - MockTokenizer tokenizer = whitespaceMockTokenizer(reader); - // LimitTokenPositionFilter doesn't consume the entire stream that it wraps - tokenizer.setEnableChecks(false); - TokenStream stream = tokenizer; - stream = tokenFilterFactory("LimitTokenPosition", - "maxTokenPosition", "1").create(stream); - assertTokenStreamContents(stream, new String[] { "A1" }); + for (final boolean consumeAll : new boolean[]{true, false}) { + Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); + MockTokenizer tokenizer = whitespaceMockTokenizer(reader); + // if we are consuming all tokens, we can use the checks, otherwise we can't + tokenizer.setEnableChecks(consumeAll); + TokenStream stream = tokenizer; + stream = tokenFilterFactory("LimitTokenPosition", + LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1", + LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll) + ).create(stream); + assertTokenStreamContents(stream, new String[]{"A1"}); + } } - + public void testMissingParam() throws Exception { try { tokenFilterFactory("LimitTokenPosition"); @@ -47,34 +51,31 @@ public class TestLimitTokenPositionFilterFactory extends BaseTokenStreamFactoryT } public void testMaxPosition1WithShingles() throws Exception { - Reader reader = new StringReader("one two three four five"); - MockTokenizer tokenizer = whitespaceMockTokenizer(reader); - // LimitTokenPositionFilter doesn't consume the entire stream that it wraps - tokenizer.setEnableChecks(false); - TokenStream stream = tokenizer; - stream = tokenFilterFactory("Shingle", - "minShingleSize", "2", - "maxShingleSize", "3", - "outputUnigrams", "true").create(stream); - stream = tokenFilterFactory("LimitTokenPosition", - "maxTokenPosition", "1").create(stream); - assertTokenStreamContents(stream, new String[] { "one", "one two", "one two three" }); + for (final boolean consumeAll : new boolean[]{true, false}) { + Reader reader = new StringReader("one two three four five"); + MockTokenizer tokenizer = whitespaceMockTokenizer(reader); + // if we are consuming all tokens, we can use the checks, otherwise we can't + tokenizer.setEnableChecks(consumeAll); + TokenStream stream = tokenizer; + stream = tokenFilterFactory("Shingle", + "minShingleSize", "2", + "maxShingleSize", "3", + "outputUnigrams", "true").create(stream); + stream = tokenFilterFactory("LimitTokenPosition", + LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1", + LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll) + ).create(stream); + assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"}); + } } - - public void testConsumeAllTokens() throws Exception { - Reader reader = new StringReader("A1 B2 C3 D4 E5 F6"); - TokenStream stream = whitespaceMockTokenizer(reader); - stream = tokenFilterFactory("LimitTokenPosition", - "maxTokenPosition", "3", - "consumeAllTokens", "true").create(stream); - assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" }); - } - - /** Test that bogus arguments result in exception */ + + /** + * Test that bogus arguments result in exception + */ public void testBogusArguments() throws Exception { try { - tokenFilterFactory("LimitTokenPosition", - "maxTokenPosition", "3", + tokenFilterFactory("LimitTokenPosition", + "maxTokenPosition", "3", "bogusArg", "bogusValue"); fail(); } catch (IllegalArgumentException expected) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java new file mode 100644 index 00000000000..c705decbe47 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilter.java @@ -0,0 +1,39 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.junit.Test; + +/** + * Test the truncate token filter. + */ +public class TestTruncateTokenFilter extends BaseTokenStreamTestCase { + + public void testTruncating() throws Exception { + TokenStream stream = whitespaceMockTokenizer("abcdefg 1234567 ABCDEFG abcde abc 12345 123"); + stream = new TruncateTokenFilter(stream, 5); + assertTokenStreamContents(stream, new String[]{"abcde", "12345", "ABCDE", "abcde", "abc", "12345", "123"}); + } + + @Test(expected = IllegalArgumentException.class) + public void testNonPositiveLength() throws Exception { + new TruncateTokenFilter(whitespaceMockTokenizer("length must be a positive number"), -48); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java new file mode 100644 index 00000000000..8b2d76527b8 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java @@ -0,0 +1,73 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase; + +import java.io.Reader; +import java.io.StringReader; + +/** + * Simple tests to ensure the simple truncation filter factory is working. + */ +public class TestTruncateTokenFilterFactory extends BaseTokenStreamFactoryTestCase { + /** + * Ensure the filter actually truncates text. + */ + public void testTruncating() throws Exception { + Reader reader = new StringReader("abcdefg 1234567 ABCDEFG abcde abc 12345 123"); + TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false); + ((Tokenizer) stream).setReader(reader); + stream = tokenFilterFactory("Truncate", + TruncateTokenFilterFactory.PREFIX_LENGTH_KEY, "5").create(stream); + assertTokenStreamContents(stream, new String[]{"abcde", "12345", "ABCDE", "abcde", "abc", "12345", "123"}); + } + + /** + * Test that bogus arguments result in exception + */ + public void testBogusArguments() throws Exception { + try { + tokenFilterFactory("Truncate", + TruncateTokenFilterFactory.PREFIX_LENGTH_KEY, "5", + "bogusArg", "bogusValue"); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("Unknown parameter(s):")); + } + } + + /** + * Test that negative prefix length result in exception + */ + public void testNonPositivePrefixLengthArgument() throws Exception { + try { + tokenFilterFactory("Truncate", + TruncateTokenFilterFactory.PREFIX_LENGTH_KEY, "-5" + ); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains(TruncateTokenFilterFactory.PREFIX_LENGTH_KEY + " parameter must be a positive number: -5")); + } + } +} + + diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java index 1f4efec3596..6dc5b3fd602 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java @@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import java.io.StringReader; +import java.nio.charset.StandardCharsets; public class DelimitedPayloadTokenFilterTest extends BaseTokenStreamTestCase { @@ -37,15 +38,15 @@ public class DelimitedPayloadTokenFilterTest extends BaseTokenStreamTestCase { PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); - assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); - assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); - assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); - assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); + assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); - assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); - assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); - assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); + assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); @@ -59,15 +60,15 @@ public class DelimitedPayloadTokenFilterTest extends BaseTokenStreamTestCase { DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); filter.reset(); assertTermEquals("The", filter, null); - assertTermEquals("quick", filter, "JJ".getBytes("UTF-8")); - assertTermEquals("red", filter, "JJ".getBytes("UTF-8")); - assertTermEquals("fox", filter, "NN".getBytes("UTF-8")); - assertTermEquals("jumped", filter, "VB".getBytes("UTF-8")); + assertTermEquals("quick", filter, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("red", filter, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("fox", filter, "NN".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("jumped", filter, "VB".getBytes(StandardCharsets.UTF_8)); assertTermEquals("over", filter, null); assertTermEquals("the", filter, null); - assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8")); - assertTermEquals("brown", filter, "JJ".getBytes("UTF-8")); - assertTermEquals("dogs", filter, "NN".getBytes("UTF-8")); + assertTermEquals("lazy", filter, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("brown", filter, "JJ".getBytes(StandardCharsets.UTF_8)); + assertTermEquals("dogs", filter, "NN".getBytes(StandardCharsets.UTF_8)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java index 9f5442f2fe6..7239d7cc6a1 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; import java.io.StringReader; +import java.nio.charset.StandardCharsets; public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase { @@ -41,8 +42,8 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase { while (nptf.incrementToken()) { assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0])))); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); - String type = new String(payloadAtt.getPayload().bytes, "UTF-8"); - assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true); + String type = payloadAtt.getPayload().utf8ToString(); + assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type())); count++; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java index 939788af18a..b1acecd8a99 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java @@ -1,11 +1,12 @@ package org.apache.lucene.analysis.sinks; -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -21,6 +22,7 @@ import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; +import org.junit.Test; public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase { @@ -29,20 +31,25 @@ public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase { String test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(whitespaceMockTokenizer(test)); TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter); - + int count = 0; tee.reset(); while(tee.incrementToken()) { count++; } - + int sinkCount = 0; rangeToks.reset(); while (rangeToks.incrementToken()) { sinkCount++; } - + assertTrue(count + " does not equal: " + 10, count == 10); assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2); } + + @Test(expected = IllegalArgumentException.class) + public void testIllegalArguments() throws Exception { + new TokenRangeSinkFilter(4, 2); + } } \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java index 3f6f9b75bbf..e8eda12a15e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java @@ -172,4 +172,13 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes assertTrue(expected.getMessage().contains("Unknown parameters")); } } + + public void testIllegalArguments() throws Exception { + try { + tokenizerFactory("UAX29URLEmail", "maxTokenLength", "-1").create(); + fail(); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("maxTokenLength must be greater than zero")); + } + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java index 4a3ca489d61..6a8d1b58a35 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; /** Fake resource loader for tests: works if you want to fake reading a single file */ public class StringMockResourceLoader implements ResourceLoader { @@ -50,6 +51,6 @@ public class StringMockResourceLoader implements ResourceLoader { @Override public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); + return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java index d37acf104c8..3f3d61dc10a 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; @@ -49,7 +50,7 @@ public class TestFilesystemResourceLoader extends LuceneTestCase { private void assertClasspathDelegation(ResourceLoader rl) throws Exception { // try a stopwords file from classpath CharArraySet set = WordlistLoader.getSnowballWordSet( - new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), IOUtils.CHARSET_UTF_8), + new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8), TEST_VERSION_CURRENT ); assertTrue(set.contains("you")); @@ -64,7 +65,7 @@ public class TestFilesystemResourceLoader extends LuceneTestCase { final File base = TestUtil.createTempDir("fsResourceLoaderBase").getAbsoluteFile(); try { base.mkdirs(); - Writer os = new OutputStreamWriter(new FileOutputStream(new File(base, "template.txt")), IOUtils.CHARSET_UTF_8); + Writer os = new OutputStreamWriter(new FileOutputStream(new File(base, "template.txt")), StandardCharsets.UTF_8); try { os.write("foobar\n"); } finally { @@ -72,28 +73,28 @@ public class TestFilesystemResourceLoader extends LuceneTestCase { } ResourceLoader rl = new FilesystemResourceLoader(base); - assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), IOUtils.CHARSET_UTF_8).get(0)); + assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0)); // Same with full path name: String fullPath = new File(base, "template.txt").toString(); assertEquals("foobar", - WordlistLoader.getLines(rl.openResource(fullPath), IOUtils.CHARSET_UTF_8).get(0)); + WordlistLoader.getLines(rl.openResource(fullPath), StandardCharsets.UTF_8).get(0)); assertClasspathDelegation(rl); assertNotFound(rl); // now use RL without base dir: rl = new FilesystemResourceLoader(); assertEquals("foobar", - WordlistLoader.getLines(rl.openResource(new File(base, "template.txt").toString()), IOUtils.CHARSET_UTF_8).get(0)); + WordlistLoader.getLines(rl.openResource(new File(base, "template.txt").toString()), StandardCharsets.UTF_8).get(0)); assertClasspathDelegation(rl); assertNotFound(rl); } finally { - TestUtil.rmDir(base); + TestUtil.rm(base); } } public void testDelegation() throws Exception { ResourceLoader rl = new FilesystemResourceLoader(null, new StringMockResourceLoader("foobar\n")); - assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), IOUtils.CHARSET_UTF_8).get(0)); + assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0)); } } diff --git a/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java b/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java index 69b4b392f94..a96e41d77ce 100644 --- a/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java +++ b/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java @@ -25,6 +25,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URL; import java.net.URLConnection; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.util.Date; import java.util.Locale; @@ -118,7 +119,7 @@ public class GenerateJflexTLDMacros { connection.connect(); tldFileLastModified = connection.getLastModified(); BufferedReader reader = new BufferedReader - (new InputStreamReader(connection.getInputStream(), "US-ASCII")); + (new InputStreamReader(connection.getInputStream(), StandardCharsets.US_ASCII)); try { String line; while (null != (line = reader.readLine())) { @@ -150,7 +151,7 @@ public class GenerateJflexTLDMacros { (DateFormat.FULL, DateFormat.FULL, Locale.ROOT); dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); final Writer writer = new OutputStreamWriter - (new FileOutputStream(outputFile), "UTF-8"); + (new FileOutputStream(outputFile), StandardCharsets.UTF_8); try { writer.write(APACHE_LICENSE); writer.write("// Generated from IANA Root Zone Database <"); diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java index 307becc0cc9..12b5f824e18 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java @@ -20,7 +20,7 @@ package org.apache.lucene.analysis.icu.segmentation; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -132,7 +132,7 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa StringBuilder rules = new StringBuilder(); InputStream rulesStream = loader.openResource(filename); BufferedReader reader = new BufferedReader - (IOUtils.getDecodingReader(rulesStream, IOUtils.CHARSET_UTF_8)); + (IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8)); String line = null; while ((line = reader.readLine()) != null) { if ( ! line.startsWith("#")) diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java index 7f1bdfe1269..2c194ff707d 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java @@ -35,6 +35,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URL; import java.net.URLConnection; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Locale; @@ -106,7 +107,7 @@ public class GenerateUTR30DataFiles { private static void expandDataFileRules(File file) throws IOException { final FileInputStream stream = new FileInputStream(file); - final InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); + final InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8); final BufferedReader bufferedReader = new BufferedReader(reader); StringBuilder builder = new StringBuilder(); String line; @@ -154,7 +155,7 @@ public class GenerateUTR30DataFiles { if (modified) { System.err.println("Expanding rules in and overwriting " + file.getName()); final FileOutputStream out = new FileOutputStream(file, false); - Writer writer = new OutputStreamWriter(out, "UTF-8"); + Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); try { writer.write(builder.toString()); } finally { @@ -178,8 +179,8 @@ public class GenerateUTR30DataFiles { System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... "); URLConnection connection = openConnection(new URL(norm2url, NFC_TXT)); BufferedReader reader = new BufferedReader - (new InputStreamReader(connection.getInputStream(), "UTF-8")); - Writer writer = new OutputStreamWriter(new FileOutputStream(NFC_TXT), "UTF-8"); + (new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8)); + Writer writer = new OutputStreamWriter(new FileOutputStream(NFC_TXT), StandardCharsets.UTF_8); try { String line; diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java index 59f49903290..a4745d75e1f 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java @@ -25,6 +25,7 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import com.ibm.icu.text.RuleBasedBreakIterator; @@ -37,7 +38,7 @@ public class RBBIRuleCompiler { static String getRules(File ruleFile) throws IOException { StringBuilder rules = new StringBuilder(); InputStream in = new FileInputStream(ruleFile); - BufferedReader cin = new BufferedReader(new InputStreamReader(in, "UTF-8")); + BufferedReader cin = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); String line = null; while ((line = cin.readLine()) != null) { if (!line.startsWith("#")) diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java index 856f6268ad2..611b2025f81 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.ja; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.util.ResourceLoader; @@ -52,6 +53,6 @@ class StringMockResourceLoader implements ResourceLoader { @Override public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); + return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); } } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java index 3ca8a465a88..1dc39615bde 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -34,7 +35,6 @@ import org.apache.lucene.analysis.ja.dict.ConnectionCosts; import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.apache.lucene.analysis.ja.tokenattributes.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.LuceneTestCase.Slow; @@ -49,7 +49,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { } try { try { - Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8); + Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8); return new UserDictionary(reader); } finally { is.close(); @@ -571,7 +571,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { /* public void testWikipedia() throws Exception { final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml"); - final Reader r = new BufferedReader(new InputStreamReader(fis, "UTF-8")); + final Reader r = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); final long startTimeNS = System.nanoTime(); boolean done = false; @@ -618,7 +618,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { private void doTestBocchan(int numIterations) throws Exception { LineNumberReader reader = new LineNumberReader(new InputStreamReader( - this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8")); + this.getClass().getResourceAsStream("bocchan.utf-8"), StandardCharsets.UTF_8)); String line = reader.readLine(); reader.close(); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java index 366fc5d7106..fa92e94a870 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java @@ -22,13 +22,12 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; -import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; -import org.apache.lucene.util.IOUtils; public class TestSearchMode extends BaseTokenStreamTestCase { private final static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt"; @@ -47,7 +46,7 @@ public class TestSearchMode extends BaseTokenStreamTestCase { throw new FileNotFoundException("Cannot find " + SEGMENTATION_FILENAME + " in test classpath"); } try { - LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); + LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line = null; while ((line = reader.readLine()) != null) { // Remove comments diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java index c6ee197c520..bdabdb6b8d8 100644 --- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java +++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java @@ -24,6 +24,7 @@ import java.io.LineNumberReader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; public class ConnectionCostsBuilder { @@ -32,7 +33,7 @@ public class ConnectionCostsBuilder { public static ConnectionCostsWriter build(String filename) throws IOException { FileInputStream inputStream = new FileInputStream(filename); - Charset cs = Charset.forName("US-ASCII"); + Charset cs = StandardCharsets.US_ASCII; CharsetDecoder decoder = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java index 591a907bb25..8198abd6294 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java @@ -21,10 +21,9 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Properties; -import org.apache.lucene.util.IOUtils; - /** * Manages analysis data configuration for SmartChineseAnalyzer *

@@ -80,7 +79,7 @@ public class AnalyzerProfile { Properties prop = new Properties(); try { FileInputStream input = new FileInputStream(propFile); - prop.load(new InputStreamReader(input, IOUtils.CHARSET_UTF_8)); + prop.load(new InputStreamReader(input, StandardCharsets.UTF_8)); String dir = prop.getProperty("analysis.data.dir", ""); input.close(); return dir; diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java index 283743e1685..a3a5a8c3680 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java @@ -18,18 +18,16 @@ package org.apache.lucene.analysis.cn.smart; import java.io.IOException; -import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.WordlistLoader; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.cn.smart.SentenceTokenizer; -import org.apache.lucene.analysis.cn.smart.WordTokenFilter; -import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; @@ -90,7 +88,7 @@ public final class SmartChineseAnalyzer extends Analyzer { // make sure it is unmodifiable as we expose it in the outer class return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, - IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT, + StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT, Version.LUCENE_CURRENT)); } } diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java index 39f1e72b0fd..9240fbb623a 100644 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pl; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -76,7 +77,7 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase { static { try { DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, - DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), "#", Version.LUCENE_CURRENT); + DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT); } catch (IOException ex) { // default set should always be present as it is part of the // distribution (JAR) diff --git a/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java b/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java index 75d91688918..d06668967d8 100644 --- a/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java +++ b/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java @@ -65,10 +65,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.StringTokenizer; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -139,7 +139,7 @@ public class TestCompile extends LuceneTestCase { private static void assertTrie(Trie trie, String file, boolean usefull, boolean storeorig) throws Exception { LineNumberReader in = new LineNumberReader(new BufferedReader( - new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8))); + new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))); for (String line = in.readLine(); line != null; line = in.readLine()) { try { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java index 81ed4c2df0e..1ceb4d434e8 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java @@ -18,8 +18,8 @@ package org.apache.lucene.benchmark.byTask; */ import java.io.File; -import java.io.FileReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.benchmark.byTask.utils.Algorithm; import org.apache.lucene.benchmark.byTask.utils.Config; @@ -107,7 +107,7 @@ public class Benchmark { Benchmark benchmark = null; try { - benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8)); + benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, StandardCharsets.UTF_8)); } catch (Exception e) { e.printStackTrace(); System.exit(1); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java index 031a0f0b5bf..1357dce7305 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java @@ -18,7 +18,6 @@ package org.apache.lucene.benchmark.byTask.feeds; */ import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.util.IOUtils; import java.io.BufferedReader; import java.io.File; @@ -26,6 +25,7 @@ import java.io.FileFilter; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; @@ -206,7 +206,7 @@ public class DirContentSource extends ContentSource { name = f.getCanonicalPath()+"_"+iteration; } - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8)); String line = null; //First line is the date, 3rd is the title, rest is body String dateStr = reader.readLine(); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index 96768779817..45fb1a6a3fa 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.feeds; import java.io.Closeable; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.text.ParsePosition; import java.text.SimpleDateFormat; import java.util.Calendar; @@ -318,7 +319,7 @@ public class DocMaker implements Closeable { if (storeBytes) { Field bytesField = ds.getField(BYTES_FIELD, StringField.TYPE_STORED); - bytesField.setBytesValue(bdy.getBytes("UTF-8")); + bytesField.setBytesValue(bdy.getBytes(StandardCharsets.UTF_8)); doc.add(bytesField); } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java index 86a2efafb07..10b61515032 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java @@ -20,18 +20,15 @@ package org.apache.lucene.benchmark.byTask.feeds; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Locale; import java.util.Map; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; -import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.ThreadInterruptedException; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -182,10 +179,7 @@ public class EnwikiContentSource extends ContentSource { if (localFileIS != null) { // null means fileIS was closed on us try { // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader. - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - reader.parse(new InputSource(new BufferedReader(new InputStreamReader(localFileIS, decoder)))); + reader.parse(new InputSource(IOUtils.getDecodingReader(localFileIS, StandardCharsets.UTF_8))); } catch (IOException ioe) { synchronized(EnwikiContentSource.this) { if (localFileIS != is) { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java index d7e3378c048..0d92db19824 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java @@ -9,6 +9,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -62,12 +63,12 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake Reader reader = null; // note: we use a decoding reader, so if your queries are screwed up you know if (file.exists()) { - reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8); + reader = IOUtils.getDecodingReader(file, StandardCharsets.UTF_8); } else { //see if we can find it as a resource InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName); if (asStream != null) { - reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8); + reader = IOUtils.getDecodingReader(asStream, StandardCharsets.UTF_8); } } if (reader != null) { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java index a1737e232a5..ad00211b62a 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java @@ -29,6 +29,7 @@ import java.util.Properties; import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; +import org.apache.lucene.util.IOUtils; /** * A {@link ContentSource} reading one line at a time as a @@ -277,7 +278,7 @@ public class LineDocSource extends ContentSource { } file = new File(fileName).getAbsoluteFile(); if (encoding == null) { - encoding = "UTF-8"; + encoding = IOUtils.UTF_8; } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java index b3106af48b1..b4d58bc0b01 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; @@ -30,7 +31,6 @@ import java.util.Date; import java.util.Locale; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.util.IOUtils; /** * A {@link ContentSource} reading from the Reuters collection. @@ -114,7 +114,7 @@ public class ReutersContentSource extends ContentSource { name = f.getCanonicalPath() + "_" + iteration; } - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8)); try { // First line is the date, 3rd is the title, rest is body String dateStr = reader.readLine(); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java index 1942684b379..439fc594694 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; @@ -320,7 +321,7 @@ public class TrecContentSource extends ContentSource { } // encoding if (encoding == null) { - encoding = "ISO-8859-1"; + encoding = StandardCharsets.ISO_8859_1.name(); } // iteration exclusion in doc name excludeDocnameIteration = config.get("content.source.excludeIteration", false); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index 9d0ee64ee2b..054895db31d 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -20,19 +20,18 @@ package org.apache.lucene.benchmark.byTask.tasks; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LogMergePolicy; -import org.apache.lucene.index.TieredMergePolicy; -import org.apache.lucene.index.MergeScheduler; -import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeScheduler; import org.apache.lucene.index.NoDeletionPolicy; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NoMergeScheduler; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.util.Version; import java.io.BufferedOutputStream; @@ -130,7 +129,7 @@ public class CreateIndexTask extends PerfTask { if (defaultCodec != null) { try { Class clazz = Class.forName(defaultCodec).asSubclass(Codec.class); - Codec.setDefault(clazz.newInstance()); + iwConf.setCodec(clazz.newInstance()); } catch (Exception e) { throw new RuntimeException("Couldn't instantiate Codec: " + defaultCodec, e); } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java index 5c9b88f1411..334e52fdc35 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java @@ -5,6 +5,7 @@ import java.io.File; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; @@ -41,7 +42,7 @@ public class WriteEnwikiLineDocTask extends WriteLineDocTask { public WriteEnwikiLineDocTask(PerfRunData runData) throws Exception { super(runData); OutputStream out = StreamUtils.outputStream(categoriesLineFile(new File(fname))); - categoryLineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE)); + categoryLineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), StreamUtils.BUFFER_SIZE)); writeHeader(categoryLineFileOut); } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java index d0f1c5292ed..9715b35871c 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; import java.util.regex.Matcher; @@ -101,7 +102,7 @@ public class WriteLineDocTask extends PerfTask { throw new IllegalArgumentException("line.file.out must be set"); } OutputStream out = StreamUtils.outputStream(new File(fname)); - lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE)); + lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), StreamUtils.BUFFER_SIZE)); docMaker = runData.getDocMaker(); // init fields diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java index b3bcb5544b1..a683b42d0ed 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; @@ -53,7 +54,7 @@ public class QueryDriver { File topicsFile = new File(args[0]); File qrelsFile = new File(args[1]); - SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene"); + SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], IOUtils.UTF_8 /* huh, no nio.Charset ctor? */), "lucene"); FSDirectory dir = FSDirectory.open(new File(args[3])); String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexReader reader = DirectoryReader.open(dir); @@ -66,10 +67,10 @@ public class QueryDriver { // use trec utilities to read trec topics into quality queries TrecTopicsReader qReader = new TrecTopicsReader(); - QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8))); + QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, StandardCharsets.UTF_8))); // prepare judge, with trec utilities that read from a QRels file - Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8))); + Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, StandardCharsets.UTF_8))); // validate topics & judgments match each other judge.validateData(qqs, logger); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java index e0fe7a62e27..8e7f7e639b7 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java @@ -21,16 +21,13 @@ import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.lucene.util.IOUtils; - /** * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body @@ -78,7 +75,7 @@ public class ExtractReuters { */ protected void extractFile(File sgmFile) { try { - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), StandardCharsets.UTF_8)); StringBuilder buffer = new StringBuilder(1024); StringBuilder outBuffer = new StringBuilder(1024); @@ -112,7 +109,7 @@ public class ExtractReuters { File outFile = new File(outputDir, sgmFile.getName() + "-" + (docNumber++) + ".txt"); // System.out.println("Writing " + outFile); - OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8); + OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), StandardCharsets.UTF_8); writer.write(out); writer.close(); outBuffer.setLength(0); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java index 387a0ad284d..6bc02196aa4 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java @@ -22,6 +22,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.Properties; import org.apache.lucene.benchmark.byTask.feeds.ContentSource; @@ -30,7 +31,6 @@ import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource; import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.Document; -import org.apache.lucene.util.IOUtils; /** * Extract the downloaded Wikipedia dump into separate files for indexing. @@ -86,7 +86,7 @@ public class ExtractWikipedia { contents.append("\n"); try { - Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8); + Writer writer = new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8); writer.write(contents.toString()); writer.close(); } catch (IOException ioe) { diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index ed62ba3a0e7..0579601f0b2 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.Collator; import java.util.List; import java.util.Locale; @@ -406,7 +407,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { BufferedReader r = new BufferedReader( new InputStreamReader( - new FileInputStream(lineFile), "UTF-8")); + new FileInputStream(lineFile), StandardCharsets.UTF_8)); int numLines = 0; String line; while((line = r.readLine()) != null) { diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java index 77674bd4549..515c8f7b503 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java @@ -23,6 +23,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import org.apache.lucene.benchmark.byTask.feeds.AbstractQueryMaker; @@ -121,7 +122,7 @@ public class TestPerfTasksParse extends LuceneTestCase { public boolean accept(File pathname) { return pathname.isFile() && pathname.getName().endsWith(".alg"); } })) { try { - Config config = new Config(new InputStreamReader(new FileInputStream(algFile), "UTF-8")); + Config config = new Config(new InputStreamReader(new FileInputStream(algFile), StandardCharsets.UTF_8)); String contentSource = config.get("content.source", null); if (contentSource != null) { Class.forName(contentSource); } config.set("work.dir", TestUtil.createTempDir(LuceneTestCase.getTestClass().getSimpleName()).getAbsolutePath()); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java index 6a509e4fcad..dcff96f8e24 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.IOUtils; /** Tests the functionality of {@link DocMaker}. */ public class DocMakerTest extends BenchmarkTestCase { @@ -166,7 +167,7 @@ public class DocMakerTest extends BenchmarkTestCase { // DocMaker did not close its ContentSource if resetInputs was called twice, // leading to a file handle leak. File f = new File(getWorkDir(), "docMakerLeak.txt"); - PrintStream ps = new PrintStream(f, "UTF-8"); + PrintStream ps = new PrintStream(f, IOUtils.UTF_8); ps.println("one title\t" + System.currentTimeMillis() + "\tsome content"); ps.close(); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java index 785cd4ffd76..b66b8e0d91f 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java @@ -18,15 +18,13 @@ package org.apache.lucene.benchmark.byTask.feeds; */ import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.Properties; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -43,7 +41,7 @@ public class EnwikiContentSourceTest extends LuceneTestCase { @Override protected InputStream openInputStream() throws IOException { - return new ByteArrayInputStream(docs.getBytes(IOUtils.CHARSET_UTF_8)); + return new ByteArrayInputStream(docs.getBytes(StandardCharsets.UTF_8)); } } diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java index d830d396d72..9b6bfd11b13 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java @@ -23,6 +23,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.Properties; import org.apache.commons.compress.compressors.CompressorStreamFactory; @@ -53,7 +54,7 @@ public class LineDocSourceTest extends BenchmarkTestCase { private void createBZ2LineFile(File file, boolean addHeader) throws Exception { OutputStream out = new FileOutputStream(file); out = csFactory.createCompressorOutputStream("bzip2", out); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); writeDocsToFile(writer, addHeader, null); writer.close(); } @@ -90,14 +91,14 @@ public class LineDocSourceTest extends BenchmarkTestCase { private void createRegularLineFile(File file, boolean addHeader) throws Exception { OutputStream out = new FileOutputStream(file); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); writeDocsToFile(writer, addHeader, null); writer.close(); } private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception { OutputStream out = new FileOutputStream(file); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); Properties p = new Properties(); for (String f : extraFields) { p.setProperty(f, f); @@ -209,7 +210,7 @@ public class LineDocSourceTest extends BenchmarkTestCase { for (int i = 0; i < testCases.length; i++) { File file = new File(getWorkDir(), "one-line"); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8")); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8)); writer.write(testCases[i]); writer.newLine(); writer.close(); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java index d129bbb7a37..d93fef2c7b0 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; @@ -73,7 +74,7 @@ public class WriteEnwikiLineDocTaskTest extends BenchmarkTestCase { private void doReadTest(int n, File file, String expTitle, String expDate, String expBody) throws Exception { InputStream in = new FileInputStream(file); - BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); try { String line = br.readLine(); WriteLineDocTaskTest.assertHeaderLine(line); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java index 47aa48213b6..093d52ec60c 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Properties; import java.util.Set; @@ -168,7 +169,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { default: assertFalse("Unknown file type!",true); //fail, should not happen } - BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); try { String line = br.readLine(); assertHeaderLine(line); @@ -274,7 +275,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { wldt.doLogic(); wldt.close(); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); try { String line = br.readLine(); assertHeaderLine(line); @@ -292,7 +293,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { wldt.doLogic(); wldt.close(); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); try { String line = br.readLine(); assertHeaderLine(line); @@ -310,7 +311,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { wldt.doLogic(); wldt.close(); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); try { String line = br.readLine(); assertHeaderLine(line); @@ -345,7 +346,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { wldt.close(); Set ids = new HashSet<>(); - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); try { String line = br.readLine(); assertHeaderLine(line); // header line is written once, no matter how many threads there are diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java index 8c4c50ca94f..8a88e2049a9 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java @@ -26,10 +26,10 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; import org.junit.After; import org.junit.Before; @@ -87,7 +87,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { private File rawTextFile(String ext) throws Exception { File f = new File(testDir,"testfile." + ext); - BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -116,7 +116,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { } private void writeText(OutputStream os) throws IOException { - BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -124,7 +124,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { private void assertReadText(File f) throws Exception { InputStream ir = StreamUtils.inputStream(f); - InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8); + InputStreamReader in = new InputStreamReader(ir, StandardCharsets.UTF_8); BufferedReader r = new BufferedReader(in); String line = r.readLine(); assertEquals("Wrong text found in "+f.getName(), TEXT, line); @@ -136,14 +136,14 @@ public class StreamUtilsTest extends BenchmarkTestCase { public void setUp() throws Exception { super.setUp(); testDir = new File(getWorkDir(),"ContentSourceTest"); - TestUtil.rmDir(testDir); + TestUtil.rm(testDir); assertTrue(testDir.mkdirs()); } @Override @After public void tearDown() throws Exception { - TestUtil.rmDir(testDir); + TestUtil.rm(testDir); super.tearDown(); } diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java index 09502a10140..64de265f588 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java @@ -34,6 +34,7 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; /** * Test that quality run does its job. @@ -62,11 +63,11 @@ public class TestQualityRun extends BenchmarkTestCase { // prepare topics InputStream topics = getClass().getResourceAsStream("trecTopics.txt"); TrecTopicsReader qReader = new TrecTopicsReader(); - QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, "UTF-8"))); + QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, StandardCharsets.UTF_8))); // prepare judge InputStream qrels = getClass().getResourceAsStream("trecQRels.txt"); - Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, "UTF-8"))); + Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, StandardCharsets.UTF_8))); // validate topics & judgments match each other judge.validateData(qqs, logger); @@ -147,7 +148,7 @@ public class TestQualityRun extends BenchmarkTestCase { InputStream topicsFile = getClass().getResourceAsStream("trecTopics.txt"); TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery qqs[] = qReader.readQueries( - new BufferedReader(new InputStreamReader(topicsFile, "UTF-8"))); + new BufferedReader(new InputStreamReader(topicsFile, StandardCharsets.UTF_8))); assertEquals(20, qqs.length); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index 0a75f893b6f..9eebed604b6 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -177,7 +177,10 @@ public class BlockTermsReader extends FieldsProducer { } private void seekDir(IndexInput input, long dirOffset) throws IOException { - if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) { + if (version >= BlockTermsWriter.VERSION_CHECKSUM) { + input.seek(input.length() - CodecUtil.footerLength() - 8); + dirOffset = input.readLong(); + } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) { input.seek(input.length() - 8); dirOffset = input.readLong(); } @@ -863,4 +866,14 @@ public class BlockTermsReader extends FieldsProducer { sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0; return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException { + // verify terms + if (version >= BlockTermsWriter.VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(in); + } + // verify postings + postingsReader.checkIntegrity(); + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java index 579cd02273a..d47ec99dbc0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java @@ -63,12 +63,13 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { public static final int VERSION_START = 0; public static final int VERSION_APPEND_ONLY = 1; public static final int VERSION_META_ARRAY = 2; - public static final int VERSION_CURRENT = VERSION_META_ARRAY; + public static final int VERSION_CHECKSUM = 3; + public static final int VERSION_CURRENT = VERSION_CHECKSUM; /** Extension of terms file */ static final String TERMS_EXTENSION = "tib"; - protected final IndexOutput out; + protected IndexOutput out; final PostingsWriterBase postingsWriter; final FieldInfos fieldInfos; FieldInfo currentField; @@ -176,26 +177,30 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { } public void close() throws IOException { - try { - final long dirStart = out.getFilePointer(); - - out.writeVInt(fields.size()); - for(FieldMetaData field : fields) { - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - out.writeVLong(field.termsStartPointer); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); - } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); - if (VERSION_CURRENT >= VERSION_META_ARRAY) { - out.writeVInt(field.longsSize); + if (out != null) { + try { + final long dirStart = out.getFilePointer(); + + out.writeVInt(fields.size()); + for(FieldMetaData field : fields) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVLong(field.termsStartPointer); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); + } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); + if (VERSION_CURRENT >= VERSION_META_ARRAY) { + out.writeVInt(field.longsSize); + } } + writeTrailer(dirStart); + CodecUtil.writeFooter(out); + } finally { + IOUtils.close(out, postingsWriter, termsIndexWriter); + out = null; } - writeTrailer(dirStart); - } finally { - IOUtils.close(out, postingsWriter, termsIndexWriter); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java index b13966b34dc..d3a8ec57a3e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java @@ -66,6 +66,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { // start of the field info data private long dirOffset; + private int version; + public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator termComp, String segmentSuffix, IOContext context) throws IOException { @@ -78,6 +80,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { try { readHeader(in); + + if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(in); + } + indexInterval = in.readVInt(); if (indexInterval < 1) { throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")"); @@ -124,7 +131,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { } private void readHeader(IndexInput input) throws IOException { - CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, + version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME, FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT); } @@ -273,7 +280,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { public void close() throws IOException {} private void seekDir(IndexInput input, long dirOffset) throws IOException { - input.seek(input.length() - 8); + if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { + input.seek(input.length() - CodecUtil.footerLength() - 8); + } else { + input.seek(input.length() - 8); + } dirOffset = input.readLong(); input.seek(dirOffset); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java index 4787b7b6c78..3ddfc4141ea 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; import org.apache.lucene.util.packed.PackedInts; @@ -43,7 +42,7 @@ import java.io.IOException; * * @lucene.experimental */ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { - protected final IndexOutput out; + protected IndexOutput out; /** Extension of terms index file */ static final String TERMS_INDEX_EXTENSION = "tii"; @@ -52,7 +51,8 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { final static int VERSION_START = 0; final static int VERSION_APPEND_ONLY = 1; final static int VERSION_MONOTONIC_ADDRESSING = 2; - final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING; + final static int VERSION_CHECKSUM = 3; + final static int VERSION_CURRENT = VERSION_CHECKSUM; final static int BLOCKSIZE = 4096; final private int termIndexInterval; @@ -207,38 +207,42 @@ public class FixedGapTermsIndexWriter extends TermsIndexWriterBase { @Override public void close() throws IOException { - boolean success = false; - try { - final long dirStart = out.getFilePointer(); - final int fieldCount = fields.size(); - - int nonNullFieldCount = 0; - for(int i=0;i 0) { - nonNullFieldCount++; + if (out != null) { + boolean success = false; + try { + final long dirStart = out.getFilePointer(); + final int fieldCount = fields.size(); + + int nonNullFieldCount = 0; + for(int i=0;i 0) { + nonNullFieldCount++; + } } - } - - out.writeVInt(nonNullFieldCount); - for(int i=0;i 0) { - out.writeVInt(field.fieldInfo.number); - out.writeVInt(field.numIndexTerms); - out.writeVLong(field.termsStart); - out.writeVLong(field.indexStart); - out.writeVLong(field.packedIndexStart); - out.writeVLong(field.packedOffsetsStart); + + out.writeVInt(nonNullFieldCount); + for(int i=0;i 0) { + out.writeVInt(field.fieldInfo.number); + out.writeVInt(field.numIndexTerms); + out.writeVLong(field.termsStart); + out.writeVLong(field.indexStart); + out.writeVLong(field.packedIndexStart); + out.writeVLong(field.packedOffsetsStart); + } } - } - writeTrailer(dirStart); - success = true; - } finally { - if (success) { - IOUtils.close(out); - } else { - IOUtils.closeWhileHandlingException(out); + writeTrailer(dirStart); + CodecUtil.writeFooter(out); + success = true; + } finally { + if (success) { + IOUtils.close(out); + } else { + IOUtils.closeWhileHandlingException(out); + } + out = null; } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java index 914d661a11f..b49541c9b71 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java @@ -62,6 +62,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { try { version = readHeader(in); + + if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(in); + } seekDir(in, dirOffset); @@ -190,7 +194,10 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { public void close() throws IOException {} private void seekDir(IndexInput input, long dirOffset) throws IOException { - if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { + if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { + input.seek(input.length() - CodecUtil.footerLength() - 8); + dirOffset = input.readLong(); + } else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { input.seek(input.length() - 8); dirOffset = input.readLong(); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java index 4b9be3672e0..33824481bc2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java @@ -45,7 +45,7 @@ import org.apache.lucene.util.fst.Util; * * @lucene.experimental */ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { - protected final IndexOutput out; + protected IndexOutput out; /** Extension of terms index file */ static final String TERMS_INDEX_EXTENSION = "tiv"; @@ -53,7 +53,8 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX"; final static int VERSION_START = 0; final static int VERSION_APPEND_ONLY = 1; - final static int VERSION_CURRENT = VERSION_APPEND_ONLY; + final static int VERSION_CHECKSUM = 2; + final static int VERSION_CURRENT = VERSION_CHECKSUM; private final List fields = new ArrayList<>(); @@ -290,30 +291,34 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase { @Override public void close() throws IOException { - try { - final long dirStart = out.getFilePointer(); - final int fieldCount = fields.size(); - - int nonNullFieldCount = 0; - for(int i=0;i *

    *
  • BloomFilter (.blm) --> Header, DelegatePostingsFormatName, - * NumFilteredFields, FilterNumFilteredFields
  • + * NumFilteredFields, FilterNumFilteredFields, Footer *
  • Filter --> FieldNumber, FuzzySet
  • *
  • FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}
  • *
  • Header --> {@link CodecUtil#writeHeader CodecHeader}
  • @@ -75,13 +75,16 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; *
  • NumFilteredFields --> {@link DataOutput#writeInt Uint32}
  • *
  • FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the * field in this segment
  • + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter}
  • *
* @lucene.experimental */ public final class BloomFilteringPostingsFormat extends PostingsFormat { public static final String BLOOM_CODEC_NAME = "BloomFilter"; - public static final int BLOOM_CODEC_VERSION = 1; + public static final int VERSION_START = 1; + public static final int VERSION_CHECKSUM = 2; + public static final int VERSION_CURRENT = VERSION_CHECKSUM; /** Extension of Bloom Filters file */ static final String BLOOM_EXTENSION = "blm"; @@ -157,12 +160,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { String bloomFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); - IndexInput bloomIn = null; + ChecksumIndexInput bloomIn = null; boolean success = false; try { - bloomIn = state.directory.openInput(bloomFileName, state.context); - CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, - BLOOM_CODEC_VERSION); + bloomIn = state.directory.openChecksumInput(bloomFileName, state.context); + int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT); // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format @@ -178,6 +180,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); bloomsByFieldName.put(fieldInfo.name, bloom); } + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(bloomIn); + } else { + CodecUtil.checkEOF(bloomIn); + } IOUtils.close(bloomIn); success = true; } finally { @@ -390,6 +397,11 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException { + delegateFieldsProducer.checkIntegrity(); + } } class BloomFilteredFieldsConsumer extends FieldsConsumer { @@ -466,10 +478,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION); IndexOutput bloomOutput = null; try { - bloomOutput = state.directory - .createOutput(bloomFileName, state.context); - CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, - BLOOM_CODEC_VERSION); + bloomOutput = state.directory.createOutput(bloomFileName, state.context); + CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT); // remember the name of the postings format we will delegate to bloomOutput.writeString(delegatePostingsFormat.getName()); @@ -481,6 +491,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { bloomOutput.writeInt(fieldInfo.number); saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo); } + CodecUtil.writeFooter(bloomOutput); } finally { IOUtils.close(bloomOutput); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java deleted file mode 100644 index 4f15ce61c0d..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java +++ /dev/null @@ -1,171 +0,0 @@ -package org.apache.lucene.codecs.intblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Naive int block API that writes vInts. This is - * expected to give poor performance; it's really only for - * testing the pluggability. One should typically use pfor instead. */ - -import java.io.IOException; - -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.IndexInput; - -/** Abstract base class that reads fixed-size blocks of ints - * from an IndexInput. While this is a simple approach, a - * more performant approach would directly create an impl - * of IntIndexInput inside Directory. Wrapping a generic - * IndexInput will likely cost performance. - * - * @lucene.experimental - */ -public abstract class FixedIntBlockIndexInput extends IntIndexInput { - - private final IndexInput in; - protected final int blockSize; - - public FixedIntBlockIndexInput(final IndexInput in) throws IOException { - this.in = in; - blockSize = in.readVInt(); - } - - @Override - public IntIndexInput.Reader reader() throws IOException { - final int[] buffer = new int[blockSize]; - final IndexInput clone = in.clone(); - // TODO: can this be simplified? - return new Reader(clone, buffer, this.getBlockReader(clone, buffer)); - } - - @Override - public void close() throws IOException { - in.close(); - } - - @Override - public IntIndexInput.Index index() { - return new Index(); - } - - protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) throws IOException; - - /** - * Interface for fixed-size block decoders. - *

- * Implementations should decode into the buffer in {@link #readBlock}. - */ - public interface BlockReader { - public void readBlock() throws IOException; - } - - private static class Reader extends IntIndexInput.Reader { - private final IndexInput in; - private final BlockReader blockReader; - private final int blockSize; - private final int[] pending; - - private int upto; - private boolean seekPending; - private long pendingFP; - private long lastBlockFP = -1; - - public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) { - this.in = in; - this.pending = pending; - this.blockSize = pending.length; - this.blockReader = blockReader; - upto = blockSize; - } - - void seek(final long fp, final int upto) { - assert upto < blockSize; - if (seekPending || fp != lastBlockFP) { - pendingFP = fp; - seekPending = true; - } - this.upto = upto; - } - - @Override - public int next() throws IOException { - if (seekPending) { - // Seek & load new block - in.seek(pendingFP); - lastBlockFP = pendingFP; - blockReader.readBlock(); - seekPending = false; - } else if (upto == blockSize) { - // Load new block - lastBlockFP = in.getFilePointer(); - blockReader.readBlock(); - upto = 0; - } - return pending[upto++]; - } - } - - private class Index extends IntIndexInput.Index { - private long fp; - private int upto; - - @Override - public void read(final DataInput indexIn, final boolean absolute) throws IOException { - if (absolute) { - upto = indexIn.readVInt(); - fp = indexIn.readVLong(); - } else { - final int uptoDelta = indexIn.readVInt(); - if ((uptoDelta & 1) == 1) { - // same block - upto += uptoDelta >>> 1; - } else { - // new block - upto = uptoDelta >>> 1; - fp += indexIn.readVLong(); - } - } - assert upto < blockSize; - } - - @Override - public void seek(final IntIndexInput.Reader other) throws IOException { - ((Reader) other).seek(fp, upto); - } - - @Override - public void copyFrom(final IntIndexInput.Index other) { - final Index idx = (Index) other; - fp = idx.fp; - upto = idx.upto; - } - - @Override - public Index clone() { - Index other = new Index(); - other.fp = fp; - other.upto = upto; - return other; - } - - @Override - public String toString() { - return "fp=" + fp + " upto=" + upto; - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java deleted file mode 100644 index 21fc2b80b42..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java +++ /dev/null @@ -1,128 +0,0 @@ -package org.apache.lucene.codecs.intblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Naive int block API that writes vInts. This is - * expected to give poor performance; it's really only for - * testing the pluggability. One should typically use pfor instead. */ - -import java.io.IOException; - -import org.apache.lucene.codecs.sep.IntIndexOutput; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexOutput; - -/** Abstract base class that writes fixed-size blocks of ints - * to an IndexOutput. While this is a simple approach, a - * more performant approach would directly create an impl - * of IntIndexOutput inside Directory. Wrapping a generic - * IndexInput will likely cost performance. - * - * @lucene.experimental - */ -public abstract class FixedIntBlockIndexOutput extends IntIndexOutput { - - protected final IndexOutput out; - private final int blockSize; - protected final int[] buffer; - private int upto; - - protected FixedIntBlockIndexOutput(IndexOutput out, int fixedBlockSize) throws IOException { - blockSize = fixedBlockSize; - this.out = out; - out.writeVInt(blockSize); - buffer = new int[blockSize]; - } - - protected abstract void flushBlock() throws IOException; - - @Override - public IntIndexOutput.Index index() { - return new Index(); - } - - private class Index extends IntIndexOutput.Index { - long fp; - int upto; - long lastFP; - int lastUpto; - - @Override - public void mark() throws IOException { - fp = out.getFilePointer(); - upto = FixedIntBlockIndexOutput.this.upto; - } - - @Override - public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException { - Index idx = (Index) other; - fp = idx.fp; - upto = idx.upto; - if (copyLast) { - lastFP = fp; - lastUpto = upto; - } - } - - @Override - public void write(DataOutput indexOut, boolean absolute) throws IOException { - if (absolute) { - indexOut.writeVInt(upto); - indexOut.writeVLong(fp); - } else if (fp == lastFP) { - // same block - assert upto >= lastUpto; - int uptoDelta = upto - lastUpto; - indexOut.writeVInt(uptoDelta << 1 | 1); - } else { - // new block - indexOut.writeVInt(upto << 1); - indexOut.writeVLong(fp - lastFP); - } - lastUpto = upto; - lastFP = fp; - } - - @Override - public String toString() { - return "fp=" + fp + " upto=" + upto; - } - } - - @Override - public void write(int v) throws IOException { - buffer[upto++] = v; - if (upto == blockSize) { - flushBlock(); - upto = 0; - } - } - - @Override - public void close() throws IOException { - try { - if (upto > 0) { - // NOTE: entries in the block after current upto are - // invalid - flushBlock(); - } - } finally { - out.close(); - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java deleted file mode 100644 index 0e4d2983a94..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java +++ /dev/null @@ -1,198 +0,0 @@ -package org.apache.lucene.codecs.intblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Naive int block API that writes vInts. This is - * expected to give poor performance; it's really only for - * testing the pluggability. One should typically use pfor instead. */ - -import java.io.IOException; - -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.IndexInput; - -// TODO: much of this can be shared code w/ the fixed case - -/** Abstract base class that reads variable-size blocks of ints - * from an IndexInput. While this is a simple approach, a - * more performant approach would directly create an impl - * of IntIndexInput inside Directory. Wrapping a generic - * IndexInput will likely cost performance. - * - * @lucene.experimental - */ -public abstract class VariableIntBlockIndexInput extends IntIndexInput { - - protected final IndexInput in; - protected final int maxBlockSize; - - protected VariableIntBlockIndexInput(final IndexInput in) throws IOException { - this.in = in; - maxBlockSize = in.readInt(); - } - - @Override - public IntIndexInput.Reader reader() throws IOException { - final int[] buffer = new int[maxBlockSize]; - final IndexInput clone = in.clone(); - // TODO: can this be simplified? - return new Reader(clone, buffer, this.getBlockReader(clone, buffer)); - } - - @Override - public void close() throws IOException { - in.close(); - } - - @Override - public IntIndexInput.Index index() { - return new Index(); - } - - protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) throws IOException; - - /** - * Interface for variable-size block decoders. - *

- * Implementations should decode into the buffer in {@link #readBlock}. - */ - public interface BlockReader { - public int readBlock() throws IOException; - public void seek(long pos) throws IOException; - } - - private static class Reader extends IntIndexInput.Reader { - private final IndexInput in; - - public final int[] pending; - int upto; - - private boolean seekPending; - private long pendingFP; - private int pendingUpto; - private long lastBlockFP; - private int blockSize; - private final BlockReader blockReader; - - public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) { - this.in = in; - this.pending = pending; - this.blockReader = blockReader; - } - - void seek(final long fp, final int upto) { - // TODO: should we do this in real-time, not lazy? - pendingFP = fp; - pendingUpto = upto; - assert pendingUpto >= 0: "pendingUpto=" + pendingUpto; - seekPending = true; - } - - private final void maybeSeek() throws IOException { - if (seekPending) { - if (pendingFP != lastBlockFP) { - // need new block - in.seek(pendingFP); - blockReader.seek(pendingFP); - lastBlockFP = pendingFP; - blockSize = blockReader.readBlock(); - } - upto = pendingUpto; - - // TODO: if we were more clever when writing the - // index, such that a seek point wouldn't be written - // until the int encoder "committed", we could avoid - // this (likely minor) inefficiency: - - // This is necessary for int encoders that are - // non-causal, ie must see future int values to - // encode the current ones. - while(upto >= blockSize) { - upto -= blockSize; - lastBlockFP = in.getFilePointer(); - blockSize = blockReader.readBlock(); - } - seekPending = false; - } - } - - @Override - public int next() throws IOException { - this.maybeSeek(); - if (upto == blockSize) { - lastBlockFP = in.getFilePointer(); - blockSize = blockReader.readBlock(); - upto = 0; - } - - return pending[upto++]; - } - } - - private class Index extends IntIndexInput.Index { - private long fp; - private int upto; - - @Override - public void read(final DataInput indexIn, final boolean absolute) throws IOException { - if (absolute) { - upto = indexIn.readVInt(); - fp = indexIn.readVLong(); - } else { - final int uptoDelta = indexIn.readVInt(); - if ((uptoDelta & 1) == 1) { - // same block - upto += uptoDelta >>> 1; - } else { - // new block - upto = uptoDelta >>> 1; - fp += indexIn.readVLong(); - } - } - // TODO: we can't do this assert because non-causal - // int encoders can have upto over the buffer size - //assert upto < maxBlockSize: "upto=" + upto + " max=" + maxBlockSize; - } - - @Override - public String toString() { - return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize; - } - - @Override - public void seek(final IntIndexInput.Reader other) throws IOException { - ((Reader) other).seek(fp, upto); - } - - @Override - public void copyFrom(final IntIndexInput.Index other) { - final Index idx = (Index) other; - fp = idx.fp; - upto = idx.upto; - } - - @Override - public Index clone() { - Index other = new Index(); - other.fp = fp; - other.upto = upto; - return other; - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java deleted file mode 100644 index f8d401dc8b8..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java +++ /dev/null @@ -1,136 +0,0 @@ -package org.apache.lucene.codecs.intblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Naive int block API that writes vInts. This is - * expected to give poor performance; it's really only for - * testing the pluggability. One should typically use pfor instead. */ - -import java.io.IOException; - -import org.apache.lucene.codecs.sep.IntIndexOutput; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexOutput; - -// TODO: much of this can be shared code w/ the fixed case - -/** Abstract base class that writes variable-size blocks of ints - * to an IndexOutput. While this is a simple approach, a - * more performant approach would directly create an impl - * of IntIndexOutput inside Directory. Wrapping a generic - * IndexInput will likely cost performance. - * - * @lucene.experimental - */ -public abstract class VariableIntBlockIndexOutput extends IntIndexOutput { - - protected final IndexOutput out; - - private int upto; - private boolean hitExcDuringWrite; - - // TODO what Var-Var codecs exist in practice... and what are there blocksizes like? - // if its less than 128 we should set that as max and use byte? - - /** NOTE: maxBlockSize must be the maximum block size - * plus the max non-causal lookahead of your codec. EG Simple9 - * requires lookahead=1 because on seeing the Nth value - * it knows it must now encode the N-1 values before it. */ - protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) throws IOException { - this.out = out; - out.writeInt(maxBlockSize); - } - - /** Called one value at a time. Return the number of - * buffered input values that have been written to out. */ - protected abstract int add(int value) throws IOException; - - @Override - public IntIndexOutput.Index index() { - return new Index(); - } - - private class Index extends IntIndexOutput.Index { - long fp; - int upto; - long lastFP; - int lastUpto; - - @Override - public void mark() throws IOException { - fp = out.getFilePointer(); - upto = VariableIntBlockIndexOutput.this.upto; - } - - @Override - public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException { - Index idx = (Index) other; - fp = idx.fp; - upto = idx.upto; - if (copyLast) { - lastFP = fp; - lastUpto = upto; - } - } - - @Override - public void write(DataOutput indexOut, boolean absolute) throws IOException { - assert upto >= 0; - if (absolute) { - indexOut.writeVInt(upto); - indexOut.writeVLong(fp); - } else if (fp == lastFP) { - // same block - assert upto >= lastUpto; - int uptoDelta = upto - lastUpto; - indexOut.writeVInt(uptoDelta << 1 | 1); - } else { - // new block - indexOut.writeVInt(upto << 1); - indexOut.writeVLong(fp - lastFP); - } - lastUpto = upto; - lastFP = fp; - } - } - - @Override - public void write(int v) throws IOException { - hitExcDuringWrite = true; - upto -= add(v)-1; - hitExcDuringWrite = false; - assert upto >= 0; - } - - @Override - public void close() throws IOException { - try { - if (!hitExcDuringWrite) { - // stuff 0s in until the "real" data is flushed: - int stuffed = 0; - while(upto > stuffed) { - upto -= add(0)-1; - assert upto >= 0; - stuffed += 1; - } - } - } finally { - out.close(); - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java index a255d8a115a..9c3b7bca0c3 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java @@ -40,7 +40,7 @@ import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER; */ class DirectDocValuesConsumer extends DocValuesConsumer { - final IndexOutput data, meta; + IndexOutput data, meta; final int maxDoc; DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { @@ -142,6 +142,10 @@ class DirectDocValuesConsumer extends DocValuesConsumer { try { if (meta != null) { meta.writeVInt(-1); // write EOF marker + CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { + CodecUtil.writeFooter(data); } success = true; } finally { @@ -150,6 +154,7 @@ class DirectDocValuesConsumer extends DocValuesConsumer { } else { IOUtils.closeWhileHandlingException(data, meta); } + data = meta = null; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java index cbca82eb374..c81448e9715 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java @@ -33,6 +33,7 @@ import org.apache.lucene.index.RandomAccessOrds; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -65,6 +66,7 @@ class DirectDocValuesProducer extends DocValuesProducer { private final int maxDoc; private final AtomicLong ramBytesUsed; + private final int version; static final byte NUMBER = 0; static final byte BYTES = 1; @@ -72,22 +74,27 @@ class DirectDocValuesProducer extends DocValuesProducer { static final byte SORTED_SET = 3; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_CHECKSUM = 1; + static final int VERSION_CURRENT = VERSION_CHECKSUM; DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.getDocCount(); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. - IndexInput in = state.directory.openInput(metaName, state.context); + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); boolean success = false; - final int version; try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); readFields(in); + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(in); + } else { + CodecUtil.checkEOF(in); + } success = true; } finally { if (success) { @@ -185,6 +192,13 @@ class DirectDocValuesProducer extends DocValuesProducer { return ramBytesUsed.get(); } + @Override + public void checkIntegrity() throws IOException { + if (version >= VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(data); + } + } + @Override public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException { NumericDocValues instance = numericInstances.get(field.number); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index d0e04f2177d..2ee89dd28ee 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -109,6 +109,7 @@ public final class DirectPostingsFormat extends PostingsFormat { if (state.context.context != IOContext.Context.MERGE) { FieldsProducer loadedPostings; try { + postings.checkIntegrity(); loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff); } finally { postings.close(); @@ -157,6 +158,12 @@ public final class DirectPostingsFormat extends PostingsFormat { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException { + // if we read entirely into ram, we already validated. + // otherwise returned the raw postings reader + } } private final static class DirectField extends Terms { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 7a90867359f..2c0bc5fd233 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.automaton.ByteRunAutomaton; @@ -56,14 +57,13 @@ import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader; /** * FST-based terms dictionary reader. * * The FST index maps each term and its ord, and during seek * the ord is used fetch metadata from a single block. - * The term dictionary is fully memeory resident. + * The term dictionary is fully memory resident. * * @lucene.experimental */ @@ -71,8 +71,7 @@ public class FSTOrdTermsReader extends FieldsProducer { static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL; final TreeMap fields = new TreeMap<>(); final PostingsReaderBase postingsReader; - IndexInput indexIn = null; - IndexInput blockIn = null; + int version; //static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { @@ -80,11 +79,18 @@ public class FSTOrdTermsReader extends FieldsProducer { final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; + ChecksumIndexInput indexIn = null; + IndexInput blockIn = null; + boolean success = false; try { - this.indexIn = state.directory.openInput(termsIndexFileName, state.context); - this.blockIn = state.directory.openInput(termsBlockFileName, state.context); - readHeader(indexIn); + indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context); + blockIn = state.directory.openInput(termsBlockFileName, state.context); + version = readHeader(indexIn); readHeader(blockIn); + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(blockIn); + } + this.postingsReader.init(blockIn); seekDir(blockIn); @@ -100,12 +106,22 @@ public class FSTOrdTermsReader extends FieldsProducer { int longsSize = blockIn.readVInt(); FST index = new FST<>(indexIn, PositiveIntOutputs.getSingleton()); - TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); + TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); TermsReader previous = fields.put(fieldInfo.name, current); - checkFieldSummary(state.segmentInfo, current, previous); + checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous); } + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { + CodecUtil.checkFooter(indexIn); + } else { + CodecUtil.checkEOF(indexIn); + } + success = true; } finally { - IOUtils.closeWhileHandlingException(indexIn, blockIn); + if (success) { + IOUtils.close(indexIn, blockIn); + } else { + IOUtils.closeWhileHandlingException(indexIn, blockIn); + } } } @@ -115,10 +131,14 @@ public class FSTOrdTermsReader extends FieldsProducer { FSTOrdTermsWriter.TERMS_VERSION_CURRENT); } private void seekDir(IndexInput in) throws IOException { - in.seek(in.length() - 8); + if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { + in.seek(in.length() - CodecUtil.footerLength() - 8); + } else { + in.seek(in.length() - 8); + } in.seek(in.readLong()); } - private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException { + private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException { // #docs with field must be <= #docs if (field.docCount < 0 || field.docCount > info.getDocCount()) { throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")"); @@ -176,7 +196,7 @@ public class FSTOrdTermsReader extends FieldsProducer { final byte[] metaLongsBlock; final byte[] metaBytesBlock; - TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST index) throws IOException { + TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST index) throws IOException { this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; @@ -819,4 +839,9 @@ public class FSTOrdTermsReader extends FieldsProducer { } return ramBytesUsed; } + + @Override + public void checkIntegrity() throws IOException { + postingsReader.checkIntegrity(); + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java index d854c36029a..4bb8f0c6444 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java @@ -73,9 +73,10 @@ import org.apache.lucene.util.fst.Util; *

* *
    - *
  • TermIndex(.tix) --> Header, TermFSTNumFields
  • + *
  • TermIndex(.tix) --> Header, TermFSTNumFields, Footer
  • *
  • TermFST --> {@link FST FST<long>}
  • *
  • Header --> {@link CodecUtil#writeHeader CodecHeader}
  • + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter}
  • *
* *

Notes:

@@ -103,7 +104,7 @@ import org.apache.lucene.util.fst.Util; *
    *
  • TermBlock(.tbk) --> Header, PostingsHeader, FieldSummary, DirOffset
  • *
  • FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq, - * DocCount, LongsSize, DataBlock > NumFields
  • + * DocCount, LongsSize, DataBlock > NumFields, Footer * *
  • DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, * SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock
  • @@ -119,6 +120,7 @@ import org.apache.lucene.util.fst.Util; *
  • NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, * StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq, * LongDelta,--> {@link DataOutput#writeVLong VLong}
  • + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter}
  • *
*

Notes:

*
    @@ -148,7 +150,8 @@ public class FSTOrdTermsWriter extends FieldsConsumer { static final String TERMS_BLOCK_EXTENSION = "tbk"; static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT"; public static final int TERMS_VERSION_START = 0; - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START; + public static final int TERMS_VERSION_CHECKSUM = 1; + public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM; public static final int SKIP_INTERVAL = 8; final PostingsWriterBase postingsWriter; @@ -218,36 +221,41 @@ public class FSTOrdTermsWriter extends FieldsConsumer { } public void close() throws IOException { - IOException ioe = null; - try { - final long blockDirStart = blockOut.getFilePointer(); - - // write field summary - blockOut.writeVInt(fields.size()); - for (FieldMetaData field : fields) { - blockOut.writeVInt(field.fieldInfo.number); - blockOut.writeVLong(field.numTerms); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - blockOut.writeVLong(field.sumTotalTermFreq); + if (blockOut != null) { + IOException ioe = null; + try { + final long blockDirStart = blockOut.getFilePointer(); + + // write field summary + blockOut.writeVInt(fields.size()); + for (FieldMetaData field : fields) { + blockOut.writeVInt(field.fieldInfo.number); + blockOut.writeVLong(field.numTerms); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + blockOut.writeVLong(field.sumTotalTermFreq); + } + blockOut.writeVLong(field.sumDocFreq); + blockOut.writeVInt(field.docCount); + blockOut.writeVInt(field.longsSize); + blockOut.writeVLong(field.statsOut.getFilePointer()); + blockOut.writeVLong(field.metaLongsOut.getFilePointer()); + blockOut.writeVLong(field.metaBytesOut.getFilePointer()); + + field.skipOut.writeTo(blockOut); + field.statsOut.writeTo(blockOut); + field.metaLongsOut.writeTo(blockOut); + field.metaBytesOut.writeTo(blockOut); + field.dict.save(indexOut); } - blockOut.writeVLong(field.sumDocFreq); - blockOut.writeVInt(field.docCount); - blockOut.writeVInt(field.longsSize); - blockOut.writeVLong(field.statsOut.getFilePointer()); - blockOut.writeVLong(field.metaLongsOut.getFilePointer()); - blockOut.writeVLong(field.metaBytesOut.getFilePointer()); - - field.skipOut.writeTo(blockOut); - field.statsOut.writeTo(blockOut); - field.metaLongsOut.writeTo(blockOut); - field.metaBytesOut.writeTo(blockOut); - field.dict.save(indexOut); + writeTrailer(blockOut, blockDirStart); + CodecUtil.writeFooter(indexOut); + CodecUtil.writeFooter(blockOut); + } catch (IOException ioe2) { + ioe = ioe2; + } finally { + IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter); + blockOut = null; } - writeTrailer(blockOut, blockDirStart); - } catch (IOException ioe2) { - ioe = ioe2; - } finally { - IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index cfa4d03f326..477eb5958fe 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -59,7 +59,7 @@ import org.apache.lucene.codecs.CodecUtil; * FST-based terms dictionary reader. * * The FST directly maps each term and its metadata, - * it is memeory resident. + * it is memory resident. * * @lucene.experimental */ @@ -67,18 +67,21 @@ import org.apache.lucene.codecs.CodecUtil; public class FSTTermsReader extends FieldsProducer { final TreeMap fields = new TreeMap<>(); final PostingsReaderBase postingsReader; - final IndexInput in; //static boolean TEST = false; + final int version; public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; - this.in = state.directory.openInput(termsFileName, state.context); + final IndexInput in = state.directory.openInput(termsFileName, state.context); boolean success = false; try { - readHeader(in); + version = readHeader(in); + if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(in); + } this.postingsReader.init(in); seekDir(in); @@ -92,13 +95,15 @@ public class FSTTermsReader extends FieldsProducer { long sumDocFreq = in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); - TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); + TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields.put(fieldInfo.name, current); - checkFieldSummary(state.segmentInfo, current, previous); + checkFieldSummary(state.segmentInfo, in, current, previous); } success = true; } finally { - if (!success) { + if (success) { + IOUtils.close(in); + } else { IOUtils.closeWhileHandlingException(in); } } @@ -110,10 +115,14 @@ public class FSTTermsReader extends FieldsProducer { FSTTermsWriter.TERMS_VERSION_CURRENT); } private void seekDir(IndexInput in) throws IOException { - in.seek(in.length() - 8); + if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { + in.seek(in.length() - CodecUtil.footerLength() - 8); + } else { + in.seek(in.length() - 8); + } in.seek(in.readLong()); } - private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException { + private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException { // #docs with field must be <= #docs if (field.docCount < 0 || field.docCount > info.getDocCount()) { throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); @@ -150,7 +159,7 @@ public class FSTTermsReader extends FieldsProducer { @Override public void close() throws IOException { try { - IOUtils.close(in, postingsReader); + IOUtils.close(postingsReader); } finally { fields.clear(); } @@ -165,7 +174,7 @@ public class FSTTermsReader extends FieldsProducer { final int longsSize; final FST dict; - TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { + TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; @@ -729,4 +738,9 @@ public class FSTTermsReader extends FieldsProducer { } return ramBytesUsed; } + + @Override + public void checkIntegrity() throws IOException { + postingsReader.checkIntegrity(); + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java index 433a24007f0..6e29ff5db5a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java @@ -124,11 +124,12 @@ public class FSTTermsWriter extends FieldsConsumer { static final String TERMS_EXTENSION = "tmp"; static final String TERMS_CODEC_NAME = "FST_TERMS_DICT"; public static final int TERMS_VERSION_START = 0; - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START; + public static final int TERMS_VERSION_CHECKSUM = 1; + public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM; final PostingsWriterBase postingsWriter; final FieldInfos fieldInfos; - final IndexOutput out; + IndexOutput out; final int maxDoc; final List fields = new ArrayList<>(); @@ -199,28 +200,32 @@ public class FSTTermsWriter extends FieldsConsumer { } public void close() throws IOException { - IOException ioe = null; - try { - // write field summary - final long dirStart = out.getFilePointer(); - - out.writeVInt(fields.size()); - for (FieldMetaData field : fields) { - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); + if (out != null) { + IOException ioe = null; + try { + // write field summary + final long dirStart = out.getFilePointer(); + + out.writeVInt(fields.size()); + for (FieldMetaData field : fields) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); + } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); + out.writeVInt(field.longsSize); + field.dict.save(out); } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); - out.writeVInt(field.longsSize); - field.dict.save(out); + writeTrailer(out, dirStart); + CodecUtil.writeFooter(out); + } catch (IOException ioe2) { + ioe = ioe2; + } finally { + IOUtils.closeWhileHandlingException(ioe, out, postingsWriter); + out = null; } - writeTrailer(out, dirStart); - } catch (IOException ioe2) { - ioe = ioe2; - } finally { - IOUtils.closeWhileHandlingException(ioe, out, postingsWriter); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java index 5365a813f64..65cae92efd5 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java @@ -59,7 +59,7 @@ import static org.apache.lucene.codecs.memory.MemoryDocValuesProducer.UNCOMPRESS * Writer for {@link MemoryDocValuesFormat} */ class MemoryDocValuesConsumer extends DocValuesConsumer { - final IndexOutput data, meta; + IndexOutput data, meta; final int maxDoc; final float acceptableOverheadRatio; @@ -208,6 +208,10 @@ class MemoryDocValuesConsumer extends DocValuesConsumer { try { if (meta != null) { meta.writeVInt(-1); // write EOF marker + CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { + CodecUtil.writeFooter(data); } success = true; } finally { @@ -216,6 +220,7 @@ class MemoryDocValuesConsumer extends DocValuesConsumer { } else { IOUtils.closeWhileHandlingException(data, meta); } + data = meta = null; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java index 4b75e88332e..79790ebc078 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -77,6 +78,7 @@ class MemoryDocValuesProducer extends DocValuesProducer { private final int maxDoc; private final AtomicLong ramBytesUsed; + private final int version; static final byte NUMBER = 0; static final byte BYTES = 1; @@ -91,15 +93,15 @@ class MemoryDocValuesProducer extends DocValuesProducer { static final int VERSION_START = 0; static final int VERSION_GCD_COMPRESSION = 1; - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; + static final int VERSION_CHECKSUM = 2; + static final int VERSION_CURRENT = VERSION_CHECKSUM; MemoryDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.getDocCount(); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. - IndexInput in = state.directory.openInput(metaName, state.context); + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); boolean success = false; - final int version; try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, @@ -108,6 +110,11 @@ class MemoryDocValuesProducer extends DocValuesProducer { binaries = new HashMap<>(); fsts = new HashMap<>(); readFields(in, state.fieldInfos); + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(in); + } else { + CodecUtil.checkEOF(in); + } ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); success = true; } finally { @@ -208,6 +215,13 @@ class MemoryDocValuesProducer extends DocValuesProducer { return ramBytesUsed.get(); } + @Override + public void checkIntegrity() throws IOException { + if (version >= VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(data); + } + } + private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.number); data.seek(entry.offset + entry.missingBytes); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index 0614c9520a8..4cf947d1b88 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; @@ -41,6 +42,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -271,6 +273,9 @@ public final class MemoryPostingsFormat extends PostingsFormat { } private static String EXTENSION = "ram"; + private static final String CODEC_NAME = "MemoryPostings"; + private static final int VERSION_START = 0; + private static final int VERSION_CURRENT = VERSION_START; private class MemoryFieldsConsumer extends FieldsConsumer implements Closeable { private final SegmentWriteState state; @@ -279,6 +284,15 @@ public final class MemoryPostingsFormat extends PostingsFormat { private MemoryFieldsConsumer(SegmentWriteState state) throws IOException { final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); out = state.directory.createOutput(fileName, state.context); + boolean success = false; + try { + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(out); + } + } this.state = state; } @@ -403,6 +417,7 @@ public final class MemoryPostingsFormat extends PostingsFormat { // EOF marker: try { out.writeVInt(0); + CodecUtil.writeFooter(out); } finally { out.close(); } @@ -951,7 +966,8 @@ public final class MemoryPostingsFormat extends PostingsFormat { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION); - final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE); + final ChecksumIndexInput in = state.directory.openChecksumInput(fileName, IOContext.READONCE); + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); final SortedMap fields = new TreeMap<>(); @@ -965,6 +981,7 @@ public final class MemoryPostingsFormat extends PostingsFormat { // System.out.println("load field=" + termsReader.field.name); fields.put(termsReader.field.name, termsReader); } + CodecUtil.checkFooter(in); } finally { in.close(); } @@ -1002,6 +1019,9 @@ public final class MemoryPostingsFormat extends PostingsFormat { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException {} }; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java index 4a2e295f0fa..cadd84d568b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java @@ -653,4 +653,9 @@ public class PulsingPostingsReader extends PostingsReaderBase { public long ramBytesUsed() { return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0); } + + @Override + public void checkIntegrity() throws IOException { + wrappedPostingsReader.checkIntegrity(); + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java deleted file mode 100644 index 216f5904d04..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Closeable; -import java.io.IOException; - -import org.apache.lucene.store.DataInput; - -/** Defines basic API for writing ints to an IndexOutput. - * IntBlockCodec interacts with this API. @see - * IntBlockReader - * - * @lucene.experimental */ -public abstract class IntIndexInput implements Closeable { - - public abstract Reader reader() throws IOException; - - @Override - public abstract void close() throws IOException; - - public abstract Index index() throws IOException; - - /** Records a single skip-point in the {@link IntIndexInput.Reader}. */ - public abstract static class Index { - - public abstract void read(DataInput indexIn, boolean absolute) throws IOException; - - /** Seeks primary stream to the last read offset */ - public abstract void seek(IntIndexInput.Reader stream) throws IOException; - - public abstract void copyFrom(Index other); - - @Override - public abstract Index clone(); - } - - /** Reads int values. */ - public abstract static class Reader { - - /** Reads next single int */ - public abstract int next() throws IOException; - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java deleted file mode 100644 index f6523ee84a0..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// TODO: we may want tighter integration w/ IndexOutput -- -// may give better perf: - -import org.apache.lucene.store.DataOutput; - -import java.io.IOException; -import java.io.Closeable; - -/** Defines basic API for writing ints to an IndexOutput. - * IntBlockCodec interacts with this API. @see - * IntBlockReader. - * - *

    NOTE: block sizes could be variable - * - * @lucene.experimental */ -public abstract class IntIndexOutput implements Closeable { - - /** Write an int to the primary file. The value must be - * >= 0. */ - public abstract void write(int v) throws IOException; - - /** Records a single skip-point in the IndexOutput. */ - public abstract static class Index { - - /** Internally records the current location */ - public abstract void mark() throws IOException; - - /** Copies index from other */ - public abstract void copyFrom(Index other, boolean copyLast) throws IOException; - - /** Writes "location" of current output pointer of primary - * output to different output (out) */ - public abstract void write(DataOutput indexOut, boolean absolute) throws IOException; - } - - /** If you are indexing the primary output file, call - * this and interact with the returned IndexWriter. */ - public abstract Index index(); - - @Override - public abstract void close() throws IOException; -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java deleted file mode 100644 index d239f15d464..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java +++ /dev/null @@ -1,709 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.TermState; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** Concrete class that reads the current doc/freq/skip - * postings format. - * - * @lucene.experimental - */ - -// TODO: -- should we switch "hasProx" higher up? and -// create two separate docs readers, one that also reads -// prox and one that doesn't? - -public class SepPostingsReader extends PostingsReaderBase { - - final IntIndexInput freqIn; - final IntIndexInput docIn; - final IntIndexInput posIn; - final IndexInput payloadIn; - final IndexInput skipIn; - - int skipInterval; - int maxSkipLevels; - int skipMinimum; - - public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, String segmentSuffix) throws IOException { - boolean success = false; - try { - - final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION); - docIn = intFactory.openInput(dir, docFileName, context); - - skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context); - - if (fieldInfos.hasFreq()) { - freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context); - } else { - freqIn = null; - } - if (fieldInfos.hasProx()) { - posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context); - payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context); - } else { - posIn = null; - payloadIn = null; - } - success = true; - } finally { - if (!success) { - close(); - } - } - } - - @Override - public void init(IndexInput termsIn) throws IOException { - // Make sure we are talking to the matching past writer - CodecUtil.checkHeader(termsIn, SepPostingsWriter.CODEC, - SepPostingsWriter.VERSION_START, SepPostingsWriter.VERSION_START); - skipInterval = termsIn.readInt(); - maxSkipLevels = termsIn.readInt(); - skipMinimum = termsIn.readInt(); - } - - @Override - public void close() throws IOException { - IOUtils.close(freqIn, docIn, skipIn, posIn, payloadIn); - } - - private static final class SepTermState extends BlockTermState { - // We store only the seek point to the docs file because - // the rest of the info (freqIndex, posIndex, etc.) is - // stored in the docs file: - IntIndexInput.Index docIndex; - IntIndexInput.Index posIndex; - IntIndexInput.Index freqIndex; - long payloadFP; - long skipFP; - - @Override - public SepTermState clone() { - SepTermState other = new SepTermState(); - other.copyFrom(this); - return other; - } - - @Override - public void copyFrom(TermState _other) { - super.copyFrom(_other); - SepTermState other = (SepTermState) _other; - if (docIndex == null) { - docIndex = other.docIndex.clone(); - } else { - docIndex.copyFrom(other.docIndex); - } - if (other.freqIndex != null) { - if (freqIndex == null) { - freqIndex = other.freqIndex.clone(); - } else { - freqIndex.copyFrom(other.freqIndex); - } - } else { - freqIndex = null; - } - if (other.posIndex != null) { - if (posIndex == null) { - posIndex = other.posIndex.clone(); - } else { - posIndex.copyFrom(other.posIndex); - } - } else { - posIndex = null; - } - payloadFP = other.payloadFP; - skipFP = other.skipFP; - } - - @Override - public String toString() { - return super.toString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex + " payloadFP=" + payloadFP + " skipFP=" + skipFP; - } - } - - @Override - public BlockTermState newTermState() throws IOException { - final SepTermState state = new SepTermState(); - state.docIndex = docIn.index(); - if (freqIn != null) { - state.freqIndex = freqIn.index(); - } - if (posIn != null) { - state.posIndex = posIn.index(); - } - return state; - } - - @Override - public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) - throws IOException { - final SepTermState termState = (SepTermState) _termState; - termState.docIndex.read(in, absolute); - if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - termState.freqIndex.read(in, absolute); - if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - //System.out.println(" freqIndex=" + termState.freqIndex); - termState.posIndex.read(in, absolute); - //System.out.println(" posIndex=" + termState.posIndex); - if (fieldInfo.hasPayloads()) { - if (absolute) { - termState.payloadFP = in.readVLong(); - } else { - termState.payloadFP += in.readVLong(); - } - //System.out.println(" payloadFP=" + termState.payloadFP); - } - } - } - - if (termState.docFreq >= skipMinimum) { - //System.out.println(" readSkip @ " + in.getPosition()); - if (absolute) { - termState.skipFP = in.readVLong(); - } else { - termState.skipFP += in.readVLong(); - } - //System.out.println(" skipFP=" + termState.skipFP); - } else if (absolute) { - termState.skipFP = 0; - } - } - - @Override - public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException { - final SepTermState termState = (SepTermState) _termState; - SepDocsEnum docsEnum; - if (reuse == null || !(reuse instanceof SepDocsEnum)) { - docsEnum = new SepDocsEnum(); - } else { - docsEnum = (SepDocsEnum) reuse; - if (docsEnum.startDocIn != docIn) { - // If you are using ParellelReader, and pass in a - // reused DocsAndPositionsEnum, it could have come - // from another reader also using sep codec - docsEnum = new SepDocsEnum(); - } - } - - return docsEnum.init(fieldInfo, termState, liveDocs); - } - - @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, - DocsAndPositionsEnum reuse, int flags) - throws IOException { - - assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - final SepTermState termState = (SepTermState) _termState; - SepDocsAndPositionsEnum postingsEnum; - if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) { - postingsEnum = new SepDocsAndPositionsEnum(); - } else { - postingsEnum = (SepDocsAndPositionsEnum) reuse; - if (postingsEnum.startDocIn != docIn) { - // If you are using ParellelReader, and pass in a - // reused DocsAndPositionsEnum, it could have come - // from another reader also using sep codec - postingsEnum = new SepDocsAndPositionsEnum(); - } - } - - return postingsEnum.init(fieldInfo, termState, liveDocs); - } - - class SepDocsEnum extends DocsEnum { - int docFreq; - int doc = -1; - int accum; - int count; - int freq; - long freqStart; - - // TODO: -- should we do omitTF with 2 different enum classes? - private boolean omitTF; - private IndexOptions indexOptions; - private boolean storePayloads; - private Bits liveDocs; - private final IntIndexInput.Reader docReader; - private final IntIndexInput.Reader freqReader; - private long skipFP; - - private final IntIndexInput.Index docIndex; - private final IntIndexInput.Index freqIndex; - private final IntIndexInput.Index posIndex; - private final IntIndexInput startDocIn; - - // TODO: -- should we do hasProx with 2 different enum classes? - - boolean skipped; - SepSkipListReader skipper; - - SepDocsEnum() throws IOException { - startDocIn = docIn; - docReader = docIn.reader(); - docIndex = docIn.index(); - if (freqIn != null) { - freqReader = freqIn.reader(); - freqIndex = freqIn.index(); - } else { - freqReader = null; - freqIndex = null; - } - if (posIn != null) { - posIndex = posIn.index(); // only init this so skipper can read it - } else { - posIndex = null; - } - } - - SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException { - this.liveDocs = liveDocs; - this.indexOptions = fieldInfo.getIndexOptions(); - omitTF = indexOptions == IndexOptions.DOCS_ONLY; - storePayloads = fieldInfo.hasPayloads(); - - // TODO: can't we only do this if consumer - // skipped consuming the previous docs? - docIndex.copyFrom(termState.docIndex); - docIndex.seek(docReader); - - if (!omitTF) { - freqIndex.copyFrom(termState.freqIndex); - freqIndex.seek(freqReader); - } - - docFreq = termState.docFreq; - // NOTE: unused if docFreq < skipMinimum: - skipFP = termState.skipFP; - count = 0; - doc = -1; - accum = 0; - freq = 1; - skipped = false; - - return this; - } - - @Override - public int nextDoc() throws IOException { - - while(true) { - if (count == docFreq) { - return doc = NO_MORE_DOCS; - } - - count++; - - // Decode next doc - //System.out.println("decode docDelta:"); - accum += docReader.next(); - - if (!omitTF) { - //System.out.println("decode freq:"); - freq = freqReader.next(); - } - - if (liveDocs == null || liveDocs.get(accum)) { - break; - } - } - return (doc = accum); - } - - @Override - public int freq() throws IOException { - return freq; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int advance(int target) throws IOException { - - if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { - - // There are enough docs in the posting to have - // skip data, and its not too close - - if (skipper == null) { - // This DocsEnum has never done any skipping - skipper = new SepSkipListReader(skipIn.clone(), - freqIn, - docIn, - posIn, - maxSkipLevels, skipInterval); - - } - - if (!skipped) { - // We haven't yet skipped for this posting - skipper.init(skipFP, - docIndex, - freqIndex, - posIndex, - 0, - docFreq, - storePayloads); - skipper.setIndexOptions(indexOptions); - - skipped = true; - } - - final int newCount = skipper.skipTo(target); - - if (newCount > count) { - - // Skipper did move - if (!omitTF) { - skipper.getFreqIndex().seek(freqReader); - } - skipper.getDocIndex().seek(docReader); - count = newCount; - doc = accum = skipper.getDoc(); - } - } - - // Now, linear scan for the rest: - do { - if (nextDoc() == NO_MORE_DOCS) { - return NO_MORE_DOCS; - } - } while (target > doc); - - return doc; - } - - @Override - public long cost() { - return docFreq; - } - } - - class SepDocsAndPositionsEnum extends DocsAndPositionsEnum { - int docFreq; - int doc = -1; - int accum; - int count; - int freq; - long freqStart; - - private boolean storePayloads; - private Bits liveDocs; - private final IntIndexInput.Reader docReader; - private final IntIndexInput.Reader freqReader; - private final IntIndexInput.Reader posReader; - private final IndexInput payloadIn; - private long skipFP; - - private final IntIndexInput.Index docIndex; - private final IntIndexInput.Index freqIndex; - private final IntIndexInput.Index posIndex; - private final IntIndexInput startDocIn; - - private long payloadFP; - - private int pendingPosCount; - private int position; - private int payloadLength; - private long pendingPayloadBytes; - - private boolean skipped; - private SepSkipListReader skipper; - private boolean payloadPending; - private boolean posSeekPending; - - SepDocsAndPositionsEnum() throws IOException { - startDocIn = docIn; - docReader = docIn.reader(); - docIndex = docIn.index(); - freqReader = freqIn.reader(); - freqIndex = freqIn.index(); - posReader = posIn.reader(); - posIndex = posIn.index(); - payloadIn = SepPostingsReader.this.payloadIn.clone(); - } - - SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException { - this.liveDocs = liveDocs; - storePayloads = fieldInfo.hasPayloads(); - //System.out.println("Sep D&P init"); - - // TODO: can't we only do this if consumer - // skipped consuming the previous docs? - docIndex.copyFrom(termState.docIndex); - docIndex.seek(docReader); - //System.out.println(" docIndex=" + docIndex); - - freqIndex.copyFrom(termState.freqIndex); - freqIndex.seek(freqReader); - //System.out.println(" freqIndex=" + freqIndex); - - posIndex.copyFrom(termState.posIndex); - //System.out.println(" posIndex=" + posIndex); - posSeekPending = true; - payloadPending = false; - - payloadFP = termState.payloadFP; - skipFP = termState.skipFP; - //System.out.println(" skipFP=" + skipFP); - - docFreq = termState.docFreq; - count = 0; - doc = -1; - accum = 0; - pendingPosCount = 0; - pendingPayloadBytes = 0; - skipped = false; - - return this; - } - - @Override - public int nextDoc() throws IOException { - - while(true) { - if (count == docFreq) { - return doc = NO_MORE_DOCS; - } - - count++; - - // TODO: maybe we should do the 1-bit trick for encoding - // freq=1 case? - - // Decode next doc - //System.out.println(" sep d&p read doc"); - accum += docReader.next(); - - //System.out.println(" sep d&p read freq"); - freq = freqReader.next(); - - pendingPosCount += freq; - - if (liveDocs == null || liveDocs.get(accum)) { - break; - } - } - - position = 0; - return (doc = accum); - } - - @Override - public int freq() throws IOException { - return freq; - } - - @Override - public int docID() { - return doc; - } - - @Override - public int advance(int target) throws IOException { - //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this); - - if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { - - // There are enough docs in the posting to have - // skip data, and its not too close - - if (skipper == null) { - //System.out.println(" create skipper"); - // This DocsEnum has never done any skipping - skipper = new SepSkipListReader(skipIn.clone(), - freqIn, - docIn, - posIn, - maxSkipLevels, skipInterval); - } - - if (!skipped) { - //System.out.println(" init skip data skipFP=" + skipFP); - // We haven't yet skipped for this posting - skipper.init(skipFP, - docIndex, - freqIndex, - posIndex, - payloadFP, - docFreq, - storePayloads); - skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - skipped = true; - } - final int newCount = skipper.skipTo(target); - //System.out.println(" skip newCount=" + newCount + " vs " + count); - - if (newCount > count) { - - // Skipper did move - skipper.getFreqIndex().seek(freqReader); - skipper.getDocIndex().seek(docReader); - //System.out.println(" doc seek'd to " + skipper.getDocIndex()); - // NOTE: don't seek pos here; do it lazily - // instead. Eg a PhraseQuery may skip to many - // docs before finally asking for positions... - posIndex.copyFrom(skipper.getPosIndex()); - posSeekPending = true; - count = newCount; - doc = accum = skipper.getDoc(); - //System.out.println(" moved to doc=" + doc); - //payloadIn.seek(skipper.getPayloadPointer()); - payloadFP = skipper.getPayloadPointer(); - pendingPosCount = 0; - pendingPayloadBytes = 0; - payloadPending = false; - payloadLength = skipper.getPayloadLength(); - //System.out.println(" move payloadLen=" + payloadLength); - } - } - - // Now, linear scan for the rest: - do { - if (nextDoc() == NO_MORE_DOCS) { - //System.out.println(" advance nextDoc=END"); - return NO_MORE_DOCS; - } - //System.out.println(" advance nextDoc=" + doc); - } while (target > doc); - - //System.out.println(" return doc=" + doc); - return doc; - } - - @Override - public int nextPosition() throws IOException { - if (posSeekPending) { - posIndex.seek(posReader); - payloadIn.seek(payloadFP); - posSeekPending = false; - } - - // scan over any docs that were iterated without their - // positions - while (pendingPosCount > freq) { - final int code = posReader.next(); - if (storePayloads && (code & 1) != 0) { - // Payload length has changed - payloadLength = posReader.next(); - assert payloadLength >= 0; - } - pendingPosCount--; - position = 0; - pendingPayloadBytes += payloadLength; - } - - final int code = posReader.next(); - - if (storePayloads) { - if ((code & 1) != 0) { - // Payload length has changed - payloadLength = posReader.next(); - assert payloadLength >= 0; - } - position += code >>> 1; - pendingPayloadBytes += payloadLength; - payloadPending = payloadLength > 0; - } else { - position += code; - } - - pendingPosCount--; - assert pendingPosCount >= 0; - return position; - } - - @Override - public int startOffset() { - return -1; - } - - @Override - public int endOffset() { - return -1; - } - - private BytesRef payload; - - @Override - public BytesRef getPayload() throws IOException { - if (!payloadPending) { - return null; - } - - if (pendingPayloadBytes == 0) { - return payload; - } - - assert pendingPayloadBytes >= payloadLength; - - if (pendingPayloadBytes > payloadLength) { - payloadIn.seek(payloadIn.getFilePointer() + (pendingPayloadBytes - payloadLength)); - } - - if (payload == null) { - payload = new BytesRef(); - payload.bytes = new byte[payloadLength]; - } else if (payload.bytes.length < payloadLength) { - payload.grow(payloadLength); - } - - payloadIn.readBytes(payload.bytes, 0, payloadLength); - payload.length = payloadLength; - pendingPayloadBytes = 0; - return payload; - } - - @Override - public long cost() { - return docFreq; - } - } - - @Override - public long ramBytesUsed() { - return 0; - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java deleted file mode 100644 index 7febda882ec..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java +++ /dev/null @@ -1,366 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.PushPostingsWriterBase; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** Writes frq to .frq, docs to .doc, pos to .pos, payloads - * to .pyl, skip data to .skp - * - * @lucene.experimental */ -public final class SepPostingsWriter extends PushPostingsWriterBase { - final static String CODEC = "SepPostingsWriter"; - - final static String DOC_EXTENSION = "doc"; - final static String SKIP_EXTENSION = "skp"; - final static String FREQ_EXTENSION = "frq"; - final static String POS_EXTENSION = "pos"; - final static String PAYLOAD_EXTENSION = "pyl"; - - // Increment version to change it: - final static int VERSION_START = 0; - final static int VERSION_CURRENT = VERSION_START; - - IntIndexOutput freqOut; - IntIndexOutput.Index freqIndex; - - IntIndexOutput posOut; - IntIndexOutput.Index posIndex; - - IntIndexOutput docOut; - IntIndexOutput.Index docIndex; - - IndexOutput payloadOut; - - IndexOutput skipOut; - - final SepSkipListWriter skipListWriter; - /** Expert: The fraction of TermDocs entries stored in skip tables, - * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in - * smaller indexes, greater acceleration, but fewer accelerable cases, while - * smaller values result in bigger indexes, less acceleration and more - * accelerable cases. More detailed experiments would be useful here. */ - final int skipInterval; - static final int DEFAULT_SKIP_INTERVAL = 16; - - /** - * Expert: minimum docFreq to write any skip data at all - */ - final int skipMinimum; - - /** Expert: The maximum number of skip levels. Smaller values result in - * slightly smaller indexes, but slower skipping in big posting lists. - */ - final int maxSkipLevels = 10; - - final int totalNumDocs; - - IndexOptions indexOptions; - - int lastPayloadLength; - int lastPosition; - long payloadStart; - int lastDocID; - int df; - - SepTermState lastState; - long lastPayloadFP; - long lastSkipFP; - - public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException { - this(state, factory, DEFAULT_SKIP_INTERVAL); - } - - public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException { - freqOut = null; - freqIndex = null; - posOut = null; - posIndex = null; - payloadOut = null; - boolean success = false; - try { - this.skipInterval = skipInterval; - this.skipMinimum = skipInterval; /* set to the same for now */ - final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION); - - docOut = factory.createOutput(state.directory, docFileName, state.context); - docIndex = docOut.index(); - - if (state.fieldInfos.hasFreq()) { - final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION); - freqOut = factory.createOutput(state.directory, frqFileName, state.context); - freqIndex = freqOut.index(); - } - - if (state.fieldInfos.hasProx()) { - final String posFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, POS_EXTENSION); - posOut = factory.createOutput(state.directory, posFileName, state.context); - posIndex = posOut.index(); - - // TODO: -- only if at least one field stores payloads? - final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION); - payloadOut = state.directory.createOutput(payloadFileName, state.context); - } - - final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION); - skipOut = state.directory.createOutput(skipFileName, state.context); - - totalNumDocs = state.segmentInfo.getDocCount(); - - skipListWriter = new SepSkipListWriter(skipInterval, - maxSkipLevels, - totalNumDocs, - freqOut, docOut, - posOut, payloadOut); - - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(docOut, skipOut, freqOut, posOut, payloadOut); - } - } - } - - @Override - public void init(IndexOutput termsOut) throws IOException { - CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); - // TODO: -- just ask skipper to "start" here - termsOut.writeInt(skipInterval); // write skipInterval - termsOut.writeInt(maxSkipLevels); // write maxSkipLevels - termsOut.writeInt(skipMinimum); // write skipMinimum - } - - @Override - public BlockTermState newTermState() { - return new SepTermState(); - } - - @Override - public void startTerm() throws IOException { - docIndex.mark(); - //System.out.println("SEPW: startTerm docIndex=" + docIndex); - - if (indexOptions != IndexOptions.DOCS_ONLY) { - freqIndex.mark(); - } - - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - posIndex.mark(); - payloadStart = payloadOut.getFilePointer(); - lastPayloadLength = -1; - } - - skipListWriter.resetSkip(docIndex, freqIndex, posIndex); - } - - // Currently, this instance is re-used across fields, so - // our parent calls setField whenever the field changes - @Override - public int setField(FieldInfo fieldInfo) { - super.setField(fieldInfo); - this.indexOptions = fieldInfo.getIndexOptions(); - if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) { - throw new UnsupportedOperationException("this codec cannot index offsets"); - } - skipListWriter.setIndexOptions(indexOptions); - lastPayloadFP = 0; - lastSkipFP = 0; - lastState = setEmptyState(); - return 0; - } - - private SepTermState setEmptyState() { - SepTermState emptyState = new SepTermState(); - emptyState.docIndex = docOut.index(); - if (indexOptions != IndexOptions.DOCS_ONLY) { - emptyState.freqIndex = freqOut.index(); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - emptyState.posIndex = posOut.index(); - } - } - emptyState.payloadFP = 0; - emptyState.skipFP = 0; - return emptyState; - } - - /** Adds a new doc in this term. If this returns null - * then we just skip consuming positions/payloads. */ - @Override - public void startDoc(int docID, int termDocFreq) throws IOException { - - final int delta = docID - lastDocID; - //System.out.println("SEPW: startDoc: write doc=" + docID + " delta=" + delta + " out.fp=" + docOut); - - if (docID < 0 || (df > 0 && delta <= 0)) { - throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")"); - } - - if ((++df % skipInterval) == 0) { - // TODO: -- awkward we have to make these two - // separate calls to skipper - //System.out.println(" buffer skip lastDocID=" + lastDocID); - skipListWriter.setSkipData(lastDocID, writePayloads, lastPayloadLength); - skipListWriter.bufferSkip(df); - } - - lastDocID = docID; - docOut.write(delta); - if (indexOptions != IndexOptions.DOCS_ONLY) { - //System.out.println(" sepw startDoc: write freq=" + termDocFreq); - freqOut.write(termDocFreq); - } - } - - /** Add a new position & payload */ - @Override - public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { - assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - - final int delta = position - lastPosition; - assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it) - lastPosition = position; - - if (writePayloads) { - final int payloadLength = payload == null ? 0 : payload.length; - if (payloadLength != lastPayloadLength) { - lastPayloadLength = payloadLength; - // TODO: explore whether we get better compression - // by not storing payloadLength into prox stream? - posOut.write((delta<<1)|1); - posOut.write(payloadLength); - } else { - posOut.write(delta << 1); - } - - if (payloadLength > 0) { - payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength); - } - } else { - posOut.write(delta); - } - - lastPosition = position; - } - - /** Called when we are done adding positions & payloads */ - @Override - public void finishDoc() { - lastPosition = 0; - } - - private static class SepTermState extends BlockTermState { - public IntIndexOutput.Index docIndex; - public IntIndexOutput.Index freqIndex; - public IntIndexOutput.Index posIndex; - public long payloadFP; - public long skipFP; - } - - /** Called when we are done adding docs to this term */ - @Override - public void finishTerm(BlockTermState _state) throws IOException { - SepTermState state = (SepTermState)_state; - // TODO: -- wasteful we are counting this in two places? - assert state.docFreq > 0; - assert state.docFreq == df; - - state.docIndex = docOut.index(); - state.docIndex.copyFrom(docIndex, false); - if (indexOptions != IndexOptions.DOCS_ONLY) { - state.freqIndex = freqOut.index(); - state.freqIndex.copyFrom(freqIndex, false); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - state.posIndex = posOut.index(); - state.posIndex.copyFrom(posIndex, false); - } else { - state.posIndex = null; - } - } else { - state.freqIndex = null; - state.posIndex = null; - } - - if (df >= skipMinimum) { - state.skipFP = skipOut.getFilePointer(); - //System.out.println(" skipFP=" + skipFP); - skipListWriter.writeSkip(skipOut); - //System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP)); - } else { - state.skipFP = -1; - } - state.payloadFP = payloadStart; - - lastDocID = 0; - df = 0; - } - - @Override - public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { - SepTermState state = (SepTermState)_state; - if (absolute) { - lastSkipFP = 0; - lastPayloadFP = 0; - lastState = state; - } - lastState.docIndex.copyFrom(state.docIndex, false); - lastState.docIndex.write(out, absolute); - if (indexOptions != IndexOptions.DOCS_ONLY) { - lastState.freqIndex.copyFrom(state.freqIndex, false); - lastState.freqIndex.write(out, absolute); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - lastState.posIndex.copyFrom(state.posIndex, false); - lastState.posIndex.write(out, absolute); - if (writePayloads) { - if (absolute) { - out.writeVLong(state.payloadFP); - } else { - out.writeVLong(state.payloadFP - lastPayloadFP); - } - lastPayloadFP = state.payloadFP; - } - } - } - if (state.skipFP != -1) { - if (absolute) { - out.writeVLong(state.skipFP); - } else { - out.writeVLong(state.skipFP - lastSkipFP); - } - lastSkipFP = state.skipFP; - } - } - - @Override - public void close() throws IOException { - IOUtils.close(docOut, skipOut, freqOut, posOut, payloadOut); - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListReader.java deleted file mode 100644 index e1f8b289880..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListReader.java +++ /dev/null @@ -1,209 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.codecs.MultiLevelSkipListReader; -import org.apache.lucene.index.FieldInfo.IndexOptions; - -/** - * Implements the skip list reader for the default posting list format - * that stores positions and payloads. - * - * @lucene.experimental - */ - -// TODO: rewrite this as recursive classes? -class SepSkipListReader extends MultiLevelSkipListReader { - private boolean currentFieldStoresPayloads; - private IntIndexInput.Index freqIndex[]; - private IntIndexInput.Index docIndex[]; - private IntIndexInput.Index posIndex[]; - private long payloadPointer[]; - private int payloadLength[]; - - private final IntIndexInput.Index lastFreqIndex; - private final IntIndexInput.Index lastDocIndex; - // TODO: -- make private again - final IntIndexInput.Index lastPosIndex; - - private long lastPayloadPointer; - private int lastPayloadLength; - - SepSkipListReader(IndexInput skipStream, - IntIndexInput freqIn, - IntIndexInput docIn, - IntIndexInput posIn, - int maxSkipLevels, - int skipInterval) - throws IOException { - super(skipStream, maxSkipLevels, skipInterval); - if (freqIn != null) { - freqIndex = new IntIndexInput.Index[maxSkipLevels]; - } - docIndex = new IntIndexInput.Index[maxSkipLevels]; - if (posIn != null) { - posIndex = new IntIndexInput.Index[maxNumberOfSkipLevels]; - } - for(int i=0;i 0) { - if (freqIndex != null) { - freqIndex[level-1].copyFrom(freqIndex[level]); - } - docIndex[level-1].copyFrom(docIndex[level]); - if (posIndex != null) { - posIndex[level-1].copyFrom(posIndex[level]); - } - } - } - - IntIndexInput.Index getFreqIndex() { - return lastFreqIndex; - } - - IntIndexInput.Index getPosIndex() { - return lastPosIndex; - } - - IntIndexInput.Index getDocIndex() { - return lastDocIndex; - } - - @Override - protected int readSkipData(int level, IndexInput skipStream) throws IOException { - int delta; - assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads; - if (currentFieldStoresPayloads) { - // the current field stores payloads. - // if the doc delta is odd then we have - // to read the current payload length - // because it differs from the length of the - // previous payload - delta = skipStream.readVInt(); - if ((delta & 1) != 0) { - payloadLength[level] = skipStream.readVInt(); - } - delta >>>= 1; - } else { - delta = skipStream.readVInt(); - } - if (indexOptions != IndexOptions.DOCS_ONLY) { - freqIndex[level].read(skipStream, false); - } - docIndex[level].read(skipStream, false); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - posIndex[level].read(skipStream, false); - if (currentFieldStoresPayloads) { - payloadPointer[level] += skipStream.readVInt(); - } - } - - return delta; - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java deleted file mode 100644 index fd284bd84fb..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java +++ /dev/null @@ -1,200 +0,0 @@ -package org.apache.lucene.codecs.sep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.codecs.MultiLevelSkipListWriter; -import org.apache.lucene.index.FieldInfo.IndexOptions; - -// TODO: -- skip data should somehow be more local to the -// particular stream (doc, freq, pos, payload) - -/** - * Implements the skip list writer for the default posting list format - * that stores positions and payloads. - * - * @lucene.experimental - */ -class SepSkipListWriter extends MultiLevelSkipListWriter { - private int[] lastSkipDoc; - private int[] lastSkipPayloadLength; - private long[] lastSkipPayloadPointer; - - private IntIndexOutput.Index[] docIndex; - private IntIndexOutput.Index[] freqIndex; - private IntIndexOutput.Index[] posIndex; - - private IntIndexOutput freqOutput; - // TODO: -- private again - IntIndexOutput posOutput; - // TODO: -- private again - IndexOutput payloadOutput; - - private int curDoc; - private boolean curStorePayloads; - private int curPayloadLength; - private long curPayloadPointer; - - SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, - IntIndexOutput freqOutput, - IntIndexOutput docOutput, - IntIndexOutput posOutput, - IndexOutput payloadOutput) - throws IOException { - super(skipInterval, numberOfSkipLevels, docCount); - - this.freqOutput = freqOutput; - this.posOutput = posOutput; - this.payloadOutput = payloadOutput; - - lastSkipDoc = new int[numberOfSkipLevels]; - lastSkipPayloadLength = new int[numberOfSkipLevels]; - // TODO: -- also cutover normal IndexOutput to use getIndex()? - lastSkipPayloadPointer = new long[numberOfSkipLevels]; - - freqIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - docIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - posIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - - for(int i=0;i DocSkip, FreqSkip, ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // DocSkip records the document number before every SkipInterval th document in TermFreqs. - // Document numbers are represented as differences from the previous value in the sequence. - // Case 2: current field stores payloads - // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // PayloadLength --> VInt - // In this case DocSkip/2 is the difference between - // the current and the previous value. If DocSkip - // is odd, then a PayloadLength encoded as VInt follows, - // if DocSkip is even, then it is assumed that the - // current payload length equals the length at the previous - // skip point - - assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads; - - if (curStorePayloads) { - int delta = curDoc - lastSkipDoc[level]; - if (curPayloadLength == lastSkipPayloadLength[level]) { - // the current payload length equals the length at the previous skip point, - // so we don't store the length again - skipBuffer.writeVInt(delta << 1); - } else { - // the payload length is different from the previous one. We shift the DocSkip, - // set the lowest bit and store the current payload length as VInt. - skipBuffer.writeVInt(delta << 1 | 1); - skipBuffer.writeVInt(curPayloadLength); - lastSkipPayloadLength[level] = curPayloadLength; - } - } else { - // current field does not store payloads - skipBuffer.writeVInt(curDoc - lastSkipDoc[level]); - } - - if (indexOptions != IndexOptions.DOCS_ONLY) { - freqIndex[level].mark(); - freqIndex[level].write(skipBuffer, false); - } - docIndex[level].mark(); - docIndex[level].write(skipBuffer, false); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - posIndex[level].mark(); - posIndex[level].write(skipBuffer, false); - if (curStorePayloads) { - skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level])); - } - } - - lastSkipDoc[level] = curDoc; - lastSkipPayloadPointer[level] = curPayloadPointer; - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/package.html b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/package.html deleted file mode 100644 index b51d9102715..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -Sep: base support for separate files (doc,frq,pos,skp,pyl) - - diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java index 7d1798b84a5..3d69e8a583c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.CHECKSUM; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; @@ -30,6 +31,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.ParseException; @@ -47,6 +49,8 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -225,7 +229,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer { assert StringHelper.startsWith(scratch, LENGTH); int len; try { - len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue(); + len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue(); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")"); e.initCause(pe); @@ -257,7 +261,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer { assert StringHelper.startsWith(scratch, LENGTH); int len; try { - len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue(); + len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue(); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")"); e.initCause(pe); @@ -326,7 +330,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer { assert StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in; int len; try { - len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue(); + len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue(); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")"); e.initCause(pe); @@ -404,7 +408,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer { assert StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in; int len; try { - len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue(); + len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue(); } catch (ParseException pe) { CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")"); e.initCause(pe); @@ -460,11 +464,26 @@ class SimpleTextDocValuesReader extends DocValuesProducer { /** Used only in ctor: */ private String stripPrefix(BytesRef prefix) throws IOException { - return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, "UTF-8"); + return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, StandardCharsets.UTF_8); } @Override public long ramBytesUsed() { return 0; } + + @Override + public void checkIntegrity() throws IOException { + BytesRef scratch = new BytesRef(); + IndexInput clone = data.clone(); + clone.seek(0); + ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); + while(true) { + SimpleTextUtil.readLine(input, scratch); + if (scratch.equals(END)) { + SimpleTextUtil.checkFooter(input, CHECKSUM); + break; + } + } + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java index 70ad8973c53..122907d7320 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; class SimpleTextDocValuesWriter extends DocValuesConsumer { + final static BytesRef CHECKSUM = new BytesRef("checksum "); final static BytesRef END = new BytesRef("END"); final static BytesRef FIELD = new BytesRef("field "); final static BytesRef TYPE = new BytesRef(" type "); @@ -49,7 +50,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { final static BytesRef NUMVALUES = new BytesRef(" numvalues "); final static BytesRef ORDPATTERN = new BytesRef(" ordpattern "); - final IndexOutput data; + IndexOutput data; final BytesRef scratch = new BytesRef(); final int numDocs; private final Set fieldsSeen = new HashSet<>(); // for asserting @@ -389,18 +390,25 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { @Override public void close() throws IOException { - boolean success = false; - try { - assert !fieldsSeen.isEmpty(); - // TODO: sheisty to do this here? - SimpleTextUtil.write(data, END); - SimpleTextUtil.writeNewline(data); - success = true; - } finally { - if (success) { - IOUtils.close(data); - } else { - IOUtils.closeWhileHandlingException(data); + if (data != null) { + boolean success = false; + try { + assert !fieldsSeen.isEmpty(); + // TODO: sheisty to do this here? + SimpleTextUtil.write(data, END); + SimpleTextUtil.writeNewline(data); + String checksum = Long.toString(data.getChecksum()); + SimpleTextUtil.write(data, CHECKSUM); + SimpleTextUtil.write(data, checksum, scratch); + SimpleTextUtil.writeNewline(data); + success = true; + } finally { + if (success) { + IOUtils.close(data); + } else { + IOUtils.closeWhileHandlingException(data); + } + data = null; } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java index 79206a79a3c..56efd251dbf 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java @@ -18,20 +18,20 @@ package org.apache.lucene.codecs.simpletext; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.lucene.codecs.FieldInfosReader; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; @@ -49,7 +49,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader { @Override public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION); - IndexInput input = directory.openInput(fileName, iocontext); + ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext); BytesRef scratch = new BytesRef(); boolean success = false; @@ -129,9 +129,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader { infos[i].setDocValuesGen(dvGen); } - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + SimpleTextUtil.checkFooter(input, CHECKSUM); FieldInfos fieldInfos = new FieldInfos(infos); success = true; @@ -154,6 +152,6 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader { } private String readString(int offset, BytesRef scratch) { - return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); + return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, StandardCharsets.UTF_8); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java index d9b50fcb89f..8f940533f76 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java @@ -58,6 +58,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter { static final BytesRef NUM_ATTS = new BytesRef(" attributes "); final static BytesRef ATT_KEY = new BytesRef(" key "); final static BytesRef ATT_VALUE = new BytesRef(" value "); + final static BytesRef CHECKSUM = new BytesRef("checksum "); @Override public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { @@ -132,6 +133,10 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter { } } } + String checksum = Long.toString(out.getChecksum()); + SimpleTextUtil.write(out, CHECKSUM); + SimpleTextUtil.write(out, checksum, scratch); + SimpleTextUtil.writeNewline(out); success = true; } finally { if (success) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 17b6014de9a..2eff7adea10 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -33,6 +34,8 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; @@ -50,22 +53,23 @@ import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.Util; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.CHECKSUM; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET; +import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD; + class SimpleTextFieldsReader extends FieldsProducer { private final TreeMap fields; private final IndexInput in; private final FieldInfos fieldInfos; private final int maxDoc; - final static BytesRef END = SimpleTextFieldsWriter.END; - final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD; - final static BytesRef TERM = SimpleTextFieldsWriter.TERM; - final static BytesRef DOC = SimpleTextFieldsWriter.DOC; - final static BytesRef FREQ = SimpleTextFieldsWriter.FREQ; - final static BytesRef POS = SimpleTextFieldsWriter.POS; - final static BytesRef START_OFFSET = SimpleTextFieldsWriter.START_OFFSET; - final static BytesRef END_OFFSET = SimpleTextFieldsWriter.END_OFFSET; - final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD; - public SimpleTextFieldsReader(SegmentReadState state) throws IOException { this.maxDoc = state.segmentInfo.getDocCount(); fieldInfos = state.fieldInfos; @@ -82,16 +86,18 @@ class SimpleTextFieldsReader extends FieldsProducer { } private TreeMap readFields(IndexInput in) throws IOException { + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); BytesRef scratch = new BytesRef(10); TreeMap fields = new TreeMap<>(); while (true) { - SimpleTextUtil.readLine(in, scratch); + SimpleTextUtil.readLine(input, scratch); if (scratch.equals(END)) { + SimpleTextUtil.checkFooter(input, CHECKSUM); return fields; } else if (StringHelper.startsWith(scratch, FIELD)) { - String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"); - fields.put(fieldName, in.getFilePointer()); + String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, StandardCharsets.UTF_8); + fields.put(fieldName, input.getFilePointer()); } } } @@ -668,4 +674,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException {} } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java index 2c30d0e6ad7..29872a9d99a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java @@ -35,10 +35,11 @@ import org.apache.lucene.util.IOUtils; class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable { - private final IndexOutput out; + private IndexOutput out; private final BytesRef scratch = new BytesRef(10); private final SegmentWriteState writeState; + final static BytesRef CHECKSUM = new BytesRef("checksum "); final static BytesRef END = new BytesRef("END"); final static BytesRef FIELD = new BytesRef("field "); final static BytesRef TERM = new BytesRef(" term "); @@ -215,11 +216,18 @@ class SimpleTextFieldsWriter extends FieldsConsumer implements Closeable { @Override public void close() throws IOException { - try { - write(END); - newline(); - } finally { - out.close(); + if (out != null) { + try { + write(END); + newline(); + String checksum = Long.toString(out.getChecksum()); + write(CHECKSUM); + write(checksum); + newline(); + } finally { + out.close(); + out = null; + } } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index 54fbdbb19d3..73afc191e10 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -24,9 +24,9 @@ import java.util.Collection; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; @@ -50,6 +50,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { final static BytesRef SIZE = new BytesRef("size "); final static BytesRef DOC = new BytesRef(" doc "); final static BytesRef END = new BytesRef("END"); + final static BytesRef CHECKSUM = new BytesRef("checksum "); @Override public MutableBits newLiveDocs(int size) throws IOException { @@ -69,10 +70,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { CharsRef scratchUTF16 = new CharsRef(); String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen()); - IndexInput in = null; + ChecksumIndexInput in = null; boolean success = false; try { - in = dir.openInput(fileName, context); + in = dir.openChecksumInput(fileName, context); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch, SIZE); @@ -88,6 +89,8 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { SimpleTextUtil.readLine(in, scratch); } + SimpleTextUtil.checkFooter(in, CHECKSUM); + success = true; return new SimpleTextBits(bits, size); } finally { @@ -127,6 +130,10 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { SimpleTextUtil.write(out, END); SimpleTextUtil.writeNewline(out); + String checksum = Long.toString(out.getChecksum()); + SimpleTextUtil.write(out, CHECKSUM); + SimpleTextUtil.write(out, checksum, scratch); + SimpleTextUtil.writeNewline(out); success = true; } finally { if (success) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java index e117155a77d..961559493b2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_CHECKSUM; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT; @@ -27,6 +28,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -35,9 +37,9 @@ import java.util.Set; import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; @@ -54,7 +56,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader { public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { BytesRef scratch = new BytesRef(); String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); - IndexInput input = directory.openInput(segFileName, context); + ChecksumIndexInput input = directory.openChecksumInput(segFileName, context); boolean success = false; try { SimpleTextUtil.readLine(input, scratch); @@ -96,6 +98,8 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader { String fileName = readString(SI_FILE.length, scratch); files.add(fileName); } + + SimpleTextUtil.checkFooter(input, SI_CHECKSUM); SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics); @@ -112,6 +116,6 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader { } private String readString(int offset, BytesRef scratch) { - return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); + return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, StandardCharsets.UTF_8); } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java index 513d9230388..39dc1e9a475 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java @@ -47,6 +47,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter { final static BytesRef SI_DIAG_VALUE = new BytesRef(" value "); final static BytesRef SI_NUM_FILES = new BytesRef(" files "); final static BytesRef SI_FILE = new BytesRef(" file "); + final static BytesRef SI_CHECKSUM = new BytesRef(" checksum "); @Override public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { @@ -55,7 +56,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter { si.addFile(segFileName); boolean success = false; - IndexOutput output = dir.createOutput(segFileName, ioContext); + IndexOutput output = dir.createOutput(segFileName, ioContext); try { BytesRef scratch = new BytesRef(); @@ -103,6 +104,11 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter { SimpleTextUtil.writeNewline(output); } } + + String checksum = Long.toString(output.getChecksum()); + SimpleTextUtil.write(output, SI_CHECKSUM); + SimpleTextUtil.write(output, checksum, scratch); + SimpleTextUtil.writeNewline(output); success = true; } finally { if (!success) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java index ea31336afa4..8d266df0c30 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.index.FieldInfo; @@ -26,6 +27,8 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -78,15 +81,17 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { // stored fields file in entirety up-front and save the offsets // so we can seek to the documents later. private void readIndex(int size) throws IOException { + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); offsets = new long[size]; int upto = 0; while (!scratch.equals(END)) { - readLine(); + SimpleTextUtil.readLine(input, scratch); if (StringHelper.startsWith(scratch, DOC)) { - offsets[upto] = in.getFilePointer(); + offsets[upto] = input.getFilePointer(); upto++; } } + SimpleTextUtil.checkFooter(input, CHECKSUM); assert upto == offsets.length; } @@ -141,7 +146,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { readLine(); assert StringHelper.startsWith(scratch, VALUE); if (type == TYPE_STRING) { - visitor.stringField(fieldInfo, new String(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, "UTF-8")); + visitor.stringField(fieldInfo, new String(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, StandardCharsets.UTF_8)); } else if (type == TYPE_BINARY) { byte[] copy = new byte[scratch.length-VALUE.length]; System.arraycopy(scratch.bytes, scratch.offset+VALUE.length, copy, 0, copy.length); @@ -188,6 +193,11 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length); } + private String readString(int offset, BytesRef scratch) { + UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16); + return scratchUTF16.toString(); + } + private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) { return a.length == b.length - bOffset && ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset); @@ -197,4 +207,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { public long ramBytesUsed() { return 0; } + + @Override + public void checkIntegrity() throws IOException {} } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java index 1e97ebd8cee..daa90dad771 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java @@ -51,13 +51,14 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter { final static BytesRef TYPE_FLOAT = new BytesRef("float"); final static BytesRef TYPE_DOUBLE = new BytesRef("double"); - final static BytesRef END = new BytesRef("END"); - final static BytesRef DOC = new BytesRef("doc "); - final static BytesRef NUM = new BytesRef(" numfields "); - final static BytesRef FIELD = new BytesRef(" field "); - final static BytesRef NAME = new BytesRef(" name "); - final static BytesRef TYPE = new BytesRef(" type "); - final static BytesRef VALUE = new BytesRef(" value "); + final static BytesRef CHECKSUM = new BytesRef("checksum "); + final static BytesRef END = new BytesRef("END"); + final static BytesRef DOC = new BytesRef("doc "); + final static BytesRef NUM = new BytesRef(" numfields "); + final static BytesRef FIELD = new BytesRef(" field "); + final static BytesRef NAME = new BytesRef(" name "); + final static BytesRef TYPE = new BytesRef(" type "); + final static BytesRef VALUE = new BytesRef(" value "); private final BytesRef scratch = new BytesRef(); @@ -171,6 +172,10 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter { } write(END); newLine(); + String checksum = Long.toString(out.getChecksum()); + write(CHECKSUM); + write(checksum); + newLine(); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index c9e9c9e7b4d..5ac5d11113d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -33,6 +33,8 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -82,15 +84,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { // vectors file in entirety up-front and save the offsets // so we can seek to the data later. private void readIndex(int maxDoc) throws IOException { + ChecksumIndexInput input = new BufferedChecksumIndexInput(in); offsets = new long[maxDoc]; int upto = 0; while (!scratch.equals(END)) { - readLine(); + SimpleTextUtil.readLine(input, scratch); if (StringHelper.startsWith(scratch, DOC)) { - offsets[upto] = in.getFilePointer(); + offsets[upto] = input.getFilePointer(); upto++; } } + SimpleTextUtil.checkFooter(input, CHECKSUM); assert upto == offsets.length; } @@ -537,4 +541,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { public long ramBytesUsed() { return 0; } + + @Override + public void checkIntegrity() throws IOException {} } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java index 04dd5523121..84325e5d5ef 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java @@ -37,6 +37,7 @@ import org.apache.lucene.util.IOUtils; */ public class SimpleTextTermVectorsWriter extends TermVectorsWriter { + static final BytesRef CHECKSUM = new BytesRef("checksum "); static final BytesRef END = new BytesRef("END"); static final BytesRef DOC = new BytesRef("doc "); static final BytesRef NUMFIELDS = new BytesRef(" numfields "); @@ -177,6 +178,10 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter { } write(END); newLine(); + String checksum = Long.toString(out.getChecksum()); + write(CHECKSUM); + write(checksum); + newLine(); } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java index c0c7787e18a..9a2baaa5103 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java @@ -17,11 +17,16 @@ package org.apache.lucene.codecs.simpletext; * limitations under the License. */ +import static org.apache.lucene.codecs.simpletext.SimpleTextStoredFieldsWriter.CHECKSUM; + import java.io.IOException; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.UnicodeUtil; class SimpleTextUtil { @@ -67,4 +72,18 @@ class SimpleTextUtil { scratch.offset = 0; scratch.length = upto; } + + public static void checkFooter(ChecksumIndexInput input, BytesRef prefix) throws IOException { + BytesRef scratch = new BytesRef(); + String expectedChecksum = Long.toString(input.getChecksum()); + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, prefix); + String actualChecksum = new BytesRef(scratch.bytes, prefix.length, scratch.length - prefix.length).utf8ToString(); + if (!expectedChecksum.equals(actualChecksum)) { + throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")"); + } + if (input.length() != input.getFilePointer()) { + throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")"); + } + } } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestIntBlockCodec.java b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestIntBlockCodec.java deleted file mode 100644 index c85662abeb0..00000000000 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestIntBlockCodec.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.apache.lucene.codecs.intblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.store.*; -import org.apache.lucene.codecs.mockintblock.*; -import org.apache.lucene.codecs.sep.*; - -public class TestIntBlockCodec extends LuceneTestCase { - - public void testSimpleIntBlocks() throws Exception { - Directory dir = newDirectory(); - - IntStreamFactory f = new MockFixedIntBlockPostingsFormat(128).getIntFactory(); - - IntIndexOutput out = f.createOutput(dir, "test", newIOContext(random())); - for(int i=0;i<11777;i++) { - out.write(i); - } - out.close(); - - IntIndexInput in = f.openInput(dir, "test", newIOContext(random())); - IntIndexInput.Reader r = in.reader(); - - for(int i=0;i<11777;i++) { - assertEquals(i, r.next()); - } - in.close(); - - dir.close(); - } - - public void testEmptySimpleIntBlocks() throws Exception { - Directory dir = newDirectory(); - - IntStreamFactory f = new MockFixedIntBlockPostingsFormat(128).getIntFactory(); - IntIndexOutput out = f.createOutput(dir, "test", newIOContext(random())); - - // write no ints - out.close(); - - IntIndexInput in = f.openInput(dir, "test", newIOContext(random())); - in.reader(); - // read no ints - in.close(); - dir.close(); - } -} diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java similarity index 78% rename from lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java rename to lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java index 2bf392ce131..8a7ad9f2559 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.intblock; +package org.apache.lucene.codecs.memory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -18,16 +18,14 @@ package org.apache.lucene.codecs.intblock; */ import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; import org.apache.lucene.util.TestUtil; /** - * Basic tests for FixedIntBlock + * Tests FSTOrdPostingsFormat */ -public class TestFixedIntBlockPostingsFormat extends BasePostingsFormatTestCase { - // TODO: randomize blocksize - private final Codec codec = TestUtil.alwaysPostingsFormat(new MockFixedIntBlockPostingsFormat()); +public class TestFSTOrdPostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPostingsFormat()); @Override protected Codec getCodec() { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPulsing41PostingsFormat.java similarity index 75% rename from lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java rename to lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPulsing41PostingsFormat.java index 201537dbc63..362e7ec802a 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPulsing41PostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.intblock; +package org.apache.lucene.codecs.memory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -18,17 +18,14 @@ package org.apache.lucene.codecs.intblock; */ import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.TestUtil; /** - * Basic tests for VariableIntBlock + * Tests FSTOrdPulsing41PostingsFormat */ -public class TestVariableIntBlockPostingsFormat extends BasePostingsFormatTestCase { - // TODO: randomize blocksize - private final Codec codec = TestUtil.alwaysPostingsFormat(new MockVariableIntBlockPostingsFormat()); +public class TestFSTOrdPulsing41PostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPulsing41PostingsFormat()); @Override protected Codec getCodec() { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPostingsFormat.java similarity index 82% rename from lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java rename to lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPostingsFormat.java index e49e1894a03..043968226da 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.sep; +package org.apache.lucene.codecs.memory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -18,16 +18,14 @@ package org.apache.lucene.codecs.sep; */ import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; import org.apache.lucene.util.TestUtil; /** - * Tests sep layout + * Tests FSTPostingsFormat */ -public class TestSepPostingsFormat extends BasePostingsFormatTestCase { - // TODO: randomize cutoff - private final Codec codec = TestUtil.alwaysPostingsFormat(new MockSepPostingsFormat()); +public class TestFSTPostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTPostingsFormat()); @Override protected Codec getCodec() { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPulsing41PostingsFormat.java similarity index 55% rename from lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java rename to lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPulsing41PostingsFormat.java index eace0335a8e..751b157d4bc 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTPulsing41PostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.sep; +package org.apache.lucene.codecs.memory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,20 +17,18 @@ package org.apache.lucene.codecs.sep; * limitations under the License. */ -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.util.TestUtil; -import java.io.IOException; +/** + * Tests FSTPulsing41PostingsFormat + */ +public class TestFSTPulsing41PostingsFormat extends BasePostingsFormatTestCase { + private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTPulsing41PostingsFormat()); -/** Provides int reader and writer to specified files. - * - * @lucene.experimental */ -public abstract class IntStreamFactory { - /** Create an {@link IntIndexInput} on the provided - * fileName. */ - public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException; - - /** Create an {@link IntIndexOutput} on the provided - * fileName. */ - public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException; + @Override + protected Codec getCodec() { + return codec; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index 396315bb492..624322ad47a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -131,6 +131,11 @@ public class BlockTreeTermsReader extends FieldsProducer { if (indexVersion != version) { throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion); } + + // verify + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(indexIn); + } // Have PostingsReader init itself postingsReader.init(in); @@ -157,7 +162,7 @@ public class BlockTreeTermsReader extends FieldsProducer { final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); final long sumDocFreq = in.readVLong(); final int docCount = in.readVInt(); - final int longsSize = version >= BlockTreeTermsWriter.TERMS_VERSION_META_ARRAY ? in.readVInt() : 0; + final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0; if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); } @@ -187,9 +192,9 @@ public class BlockTreeTermsReader extends FieldsProducer { /** Reads terms file header. */ private int readHeader(IndexInput input) throws IOException { int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME, - BlockTreeTermsWriter.TERMS_VERSION_START, - BlockTreeTermsWriter.TERMS_VERSION_CURRENT); - if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) { + BlockTreeTermsWriter.VERSION_START, + BlockTreeTermsWriter.VERSION_CURRENT); + if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) { dirOffset = input.readLong(); } return version; @@ -198,9 +203,9 @@ public class BlockTreeTermsReader extends FieldsProducer { /** Reads index file header. */ private int readIndexHeader(IndexInput input) throws IOException { int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME, - BlockTreeTermsWriter.TERMS_INDEX_VERSION_START, - BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT); - if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) { + BlockTreeTermsWriter.VERSION_START, + BlockTreeTermsWriter.VERSION_CURRENT); + if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) { indexDirOffset = input.readLong(); } return version; @@ -209,7 +214,10 @@ public class BlockTreeTermsReader extends FieldsProducer { /** Seek {@code input} to the directory offset. */ private void seekDir(IndexInput input, long dirOffset) throws IOException { - if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) { + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { + input.seek(input.length() - CodecUtil.footerLength() - 8); + dirOffset = input.readLong(); + } else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) { input.seek(input.length() - 8); dirOffset = input.readLong(); } @@ -391,7 +399,7 @@ public class BlockTreeTermsReader extends FieldsProducer { final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); PrintStream out; try { - out = new PrintStream(bos, false, "UTF-8"); + out = new PrintStream(bos, false, IOUtils.UTF_8); } catch (UnsupportedEncodingException bogus) { throw new RuntimeException(bogus); } @@ -428,7 +436,7 @@ public class BlockTreeTermsReader extends FieldsProducer { } try { - return bos.toString("UTF-8"); + return bos.toString(IOUtils.UTF_8); } catch (UnsupportedEncodingException bogus) { throw new RuntimeException(bogus); } @@ -2977,4 +2985,15 @@ public class BlockTreeTermsReader extends FieldsProducer { } return sizeInByes; } + + @Override + public void checkIntegrity() throws IOException { + if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { + // term dictionary + CodecUtil.checksumEntireFile(in); + + // postings + postingsReader.checkIntegrity(); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java index f363ba0052c..6320ec9a979 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java @@ -109,7 +109,7 @@ import org.apache.lucene.util.packed.PackedInts; * *

      *
    • TermsDict (.tim) --> Header, PostingsHeader, NodeBlockNumBlocks, - * FieldSummary, DirOffset
    • + * FieldSummary, DirOffset, Footer *
    • NodeBlock --> (OuterNode | InnerNode)
    • *
    • OuterNode --> EntryCount, SuffixLength, ByteSuffixLength, StatsLength, < TermStats >EntryCount, MetaLength, <TermMetadata>EntryCount
    • *
    • InnerNode --> EntryCount, SuffixLength[,Sub?], ByteSuffixLength, StatsLength, < TermStats ? >EntryCount, MetaLength, <TermMetadata ? >EntryCount
    • @@ -122,6 +122,7 @@ import org.apache.lucene.util.packed.PackedInts; * FieldNumber,RootCodeLength,DocCount --> {@link DataOutput#writeVInt VInt} *
    • TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --> * {@link DataOutput#writeVLong VLong}
    • + *
    • Footer --> {@link CodecUtil#writeFooter CodecFooter}
    • *
    *

    Notes:

    *
      @@ -150,12 +151,13 @@ import org.apache.lucene.util.packed.PackedInts; * when a given term cannot exist on disk (in the .tim file), saving a disk seek.

      *
        *
      • TermsIndex (.tip) --> Header, FSTIndexNumFields - * <IndexStartFP>NumFields, DirOffset
      • + * <IndexStartFP>NumFields, DirOffset, Footer *
      • Header --> {@link CodecUtil#writeHeader CodecHeader}
      • *
      • DirOffset --> {@link DataOutput#writeLong Uint64}
      • *
      • IndexStartFP --> {@link DataOutput#writeVLong VLong}
      • * *
      • FSTIndex --> {@link FST FST<byte[]>}
      • + *
      • Footer --> {@link CodecUtil#writeFooter CodecFooter}
      • *
      *

      Notes:

      *
        @@ -178,7 +180,6 @@ import org.apache.lucene.util.packed.PackedInts; * @see BlockTreeTermsReader * @lucene.experimental */ - public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable { /** Suggested default value for the {@code @@ -204,33 +205,24 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable { final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT"; /** Initial terms format. */ - public static final int TERMS_VERSION_START = 0; + public static final int VERSION_START = 0; /** Append-only */ - public static final int TERMS_VERSION_APPEND_ONLY = 1; + public static final int VERSION_APPEND_ONLY = 1; /** Meta data as array */ - public static final int TERMS_VERSION_META_ARRAY = 2; + public static final int VERSION_META_ARRAY = 2; + + /** checksums */ + public static final int VERSION_CHECKSUM = 3; /** Current terms format. */ - public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_META_ARRAY; + public static final int VERSION_CURRENT = VERSION_CHECKSUM; /** Extension of terms index file */ static final String TERMS_INDEX_EXTENSION = "tip"; final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX"; - /** Initial index format. */ - public static final int TERMS_INDEX_VERSION_START = 0; - - /** Append-only */ - public static final int TERMS_INDEX_VERSION_APPEND_ONLY = 1; - - /** Meta data as array */ - public static final int TERMS_INDEX_VERSION_META_ARRAY = 2; - - /** Current index format. */ - public static final int TERMS_INDEX_VERSION_CURRENT = TERMS_INDEX_VERSION_META_ARRAY; - private final IndexOutput out; private final IndexOutput indexOut; final int maxDoc; @@ -326,12 +318,12 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable { /** Writes the terms file header. */ private void writeHeader(IndexOutput out) throws IOException { - CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); + CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT); } /** Writes the index file header. */ private void writeIndexHeader(IndexOutput out) throws IOException { - CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT); + CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT); } /** Writes the terms file trailer. */ @@ -1139,13 +1131,13 @@ public class BlockTreeTermsWriter extends FieldsConsumer implements Closeable { } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); - if (TERMS_VERSION_CURRENT >= TERMS_VERSION_META_ARRAY) { - out.writeVInt(field.longsSize); - } + out.writeVInt(field.longsSize); indexOut.writeVLong(field.indexStartFP); } writeTrailer(out, dirStart); + CodecUtil.writeFooter(out); writeIndexTrailer(indexOut, indexDirStart); + CodecUtil.writeFooter(indexOut); } catch (IOException ioe2) { ioe = ioe2; } finally { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java index c9c562e2425..110ff575a58 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java @@ -23,8 +23,12 @@ import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.store.BufferedChecksumIndexInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; /** @@ -43,6 +47,10 @@ public final class CodecUtil { * Constant to identify the start of a codec header. */ public final static int CODEC_MAGIC = 0x3fd76c17; + /** + * Constant to identify the start of a codec footer. + */ + public final static int FOOTER_MAGIC = ~CODEC_MAGIC; /** * Writes a codec header, which records both a string to @@ -150,4 +158,119 @@ public final class CodecUtil { return actualVersion; } + + /** + * Writes a codec footer, which records both a checksum + * algorithm ID and a checksum. This footer can + * be parsed and validated with + * {@link #checkFooter(ChecksumIndexInput) checkFooter()}. + *

        + * CodecFooter --> Magic,AlgorithmID,Checksum + *

          + *
        • Magic --> {@link DataOutput#writeInt Uint32}. This + * identifies the start of the footer. It is always {@value #FOOTER_MAGIC}. + *
        • AlgorithmID --> {@link DataOutput#writeInt Uint32}. This + * indicates the checksum algorithm used. Currently this is always 0, + * for zlib-crc32. + *
        • Checksum --> {@link DataOutput#writeLong Uint32}. The + * actual checksum value for all previous bytes in the stream, including + * the bytes from Magic and AlgorithmID. + *
        + * + * @param out Output stream + * @throws IOException If there is an I/O error writing to the underlying medium. + */ + public static void writeFooter(IndexOutput out) throws IOException { + out.writeInt(FOOTER_MAGIC); + out.writeInt(0); + out.writeLong(out.getChecksum()); + } + + /** + * Computes the length of a codec footer. + * + * @return length of the entire codec footer. + * @see #writeFooter(IndexOutput) + */ + public static int footerLength() { + return 16; + } + + /** + * Validates the codec footer previously written by {@link #writeFooter}. + * @return actual checksum value + * @throws IOException if the footer is invalid, if the checksum does not match, + * or if {@code in} is not properly positioned before the footer + * at the end of the stream. + */ + public static long checkFooter(ChecksumIndexInput in) throws IOException { + validateFooter(in); + long actualChecksum = in.getChecksum(); + long expectedChecksum = in.readLong(); + if (expectedChecksum != actualChecksum) { + throw new CorruptIndexException("checksum failed (hardware problem?) : expected=" + Long.toHexString(expectedChecksum) + + " actual=" + Long.toHexString(actualChecksum) + + " (resource=" + in + ")"); + } + if (in.getFilePointer() != in.length()) { + throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); + } + return actualChecksum; + } + + /** + * Returns (but does not validate) the checksum previously written by {@link #checkFooter}. + * @return actual checksum value + * @throws IOException if the footer is invalid + */ + public static long retrieveChecksum(IndexInput in) throws IOException { + in.seek(in.length() - footerLength()); + validateFooter(in); + return in.readLong(); + } + + private static void validateFooter(IndexInput in) throws IOException { + final int magic = in.readInt(); + if (magic != FOOTER_MAGIC) { + throw new CorruptIndexException("codec footer mismatch: actual footer=" + magic + " vs expected footer=" + FOOTER_MAGIC + " (resource: " + in + ")"); + } + + final int algorithmID = in.readInt(); + if (algorithmID != 0) { + throw new CorruptIndexException("codec footer mismatch: unknown algorithmID: " + algorithmID); + } + } + + /** + * Checks that the stream is positioned at the end, and throws exception + * if it is not. + * @deprecated Use {@link #checkFooter} instead, this should only used for files without checksums + */ + @Deprecated + public static void checkEOF(IndexInput in) throws IOException { + if (in.getFilePointer() != in.length()) { + throw new CorruptIndexException("did not read all bytes from file: read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); + } + } + + /** + * Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter} + *

        + * Note that this method may be slow, as it must process the entire file. + * If you just need to extract the checksum value, call {@link #retrieveChecksum}. + */ + public static long checksumEntireFile(IndexInput input) throws IOException { + IndexInput clone = input.clone(); + clone.seek(0); + ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); + assert in.getFilePointer() == 0; + final byte[] buffer = new byte[1024]; + long bytesToRead = in.length() - footerLength(); + for (long skipped = 0; skipped < bytesToRead; ) { + final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length); + in.readBytes(buffer, 0, toRead); + skipped += toRead; + } + return checkFooter(in); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java index b492a9eb185..931c08238c0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java @@ -67,6 +67,15 @@ public abstract class DocValuesProducer implements Closeable { /** Returns approximate RAM bytes used */ public abstract long ramBytesUsed(); + /** + * Checks consistency of this producer + *

        + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; + /** * A simple implementation of {@link DocValuesProducer#getDocsWithField} that * returns {@code true} if a document has an ordinal >= 0 diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java index d81af811297..55bb142b7b2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java @@ -39,4 +39,13 @@ public abstract class FieldsProducer extends Fields implements Closeable { /** Returns approximate RAM bytes used */ public abstract long ramBytesUsed(); + + /** + * Checks consistency of this reader. + *

        + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java index d8471b970c2..39476ed3f65 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java @@ -72,6 +72,15 @@ public abstract class PostingsReaderBase implements Closeable { /** Returns approximate RAM bytes used */ public abstract long ramBytesUsed(); + /** + * Checks consistency of this reader. + *

        + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; + @Override public abstract void close() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java index 315f574afe1..2a57561d144 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java @@ -43,4 +43,13 @@ public abstract class StoredFieldsReader implements Cloneable, Closeable { /** Returns approximate RAM bytes used */ public abstract long ramBytesUsed(); + + /** + * Checks consistency of this reader. + *

        + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java index 95472cb5c2e..f1649fbad15 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java @@ -45,6 +45,15 @@ public abstract class TermVectorsReader implements Cloneable, Closeable { /** Returns approximate RAM bytes used */ public abstract long ramBytesUsed(); + /** + * Checks consistency of this reader. + *

        + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; + /** Create a clone that one caller at a time may use to * read term vectors. */ @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java index 2367d9e9c83..4f579557012 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexWriter.java @@ -21,6 +21,7 @@ import java.io.Closeable; import java.io.IOException; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.packed.PackedInts; @@ -52,6 +53,7 @@ import org.apache.lucene.util.packed.PackedInts; *

      • AvgChunkSize --> the average size of a chunk of compressed documents, as a {@link DataOutput#writeVLong VLong}
      • *
      • BitsPerStartPointerDelta --> number of bits required to represent a delta from the average using ZigZag encoding
      • *
      • StartPointerDeltas --> {@link PackedInts packed} array of BlockChunks elements of BitsPerStartPointerDelta bits each, representing the deltas from the average start pointer using ZigZag encoding
      • + *
      • Footer --> {@link CodecUtil#writeFooter CodecFooter}
      • *
      *

      Notes

      *
        @@ -198,6 +200,7 @@ public final class CompressingStoredFieldsIndexWriter implements Closeable { writeBlock(); } fieldsIndexOut.writeVInt(0); // end marker + CodecUtil.writeFooter(fieldsIndexOut); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index 24fa291d4cd..65c99bbed6f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS; import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK; import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS; +import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CHECKSUM; import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT; import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START; import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELDS_EXTENSION; @@ -35,6 +36,7 @@ import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.FIELD import java.io.EOFException; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import org.apache.lucene.codecs.CodecUtil; @@ -47,6 +49,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; @@ -113,17 +116,20 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { boolean success = false; fieldInfos = fn; numDocs = si.getDocCount(); - IndexInput indexStream = null; + ChecksumIndexInput indexStream = null; try { // Load the index into memory final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION); - indexStream = d.openInput(indexStreamFN, context); + indexStream = d.openChecksumInput(indexStreamFN, context); final String codecNameIdx = formatName + CODEC_SFX_IDX; version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); - if (indexStream.getFilePointer() != indexStream.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")"); + + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(indexStream); + } else { + CodecUtil.checkEOF(indexStream); } indexStream.close(); indexStream = null; @@ -187,7 +193,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { length = in.readVInt(); data = new byte[length]; in.readBytes(data, 0, length); - visitor.stringField(info, new String(data, IOUtils.CHARSET_UTF_8)); + visitor.stringField(info, new String(data, StandardCharsets.UTF_8)); break; case NUMERIC_INT: visitor.intField(info, in.readInt()); @@ -509,4 +515,11 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { return indexReader.ramBytesUsed(); } + @Override + public void checkIntegrity() throws IOException { + if (version >= VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(fieldsStream); + } + } + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java index 35f829daa26..c0e53b598c1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java @@ -71,7 +71,8 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { static final String CODEC_SFX_DAT = "Data"; static final int VERSION_START = 0; static final int VERSION_BIG_CHUNKS = 1; - static final int VERSION_CURRENT = VERSION_BIG_CHUNKS; + static final int VERSION_CHECKSUM = 2; + static final int VERSION_CURRENT = VERSION_CHECKSUM; private final Directory directory; private final String segment; @@ -106,9 +107,11 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { this.numBufferedDocs = 0; boolean success = false; - IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), context); + IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION), + context); try { - fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context); + fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), + context); final String codecNameIdx = formatName + CODEC_SFX_IDX; final String codecNameDat = formatName + CODEC_SFX_DAT; @@ -314,6 +317,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter { throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs); } indexWriter.finish(numDocs); + CodecUtil.writeFooter(fieldsStream); assert bufferedDocs.length == 0; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index a98cb83347a..f5356387ed5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -28,6 +28,7 @@ import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter. import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION; import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT; import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START; +import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHECKSUM; import java.io.Closeable; import java.io.IOException; @@ -48,6 +49,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -69,6 +71,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem private final FieldInfos fieldInfos; final CompressingStoredFieldsIndexReader indexReader; final IndexInput vectorsStream; + private final int version; private final int packedIntsVersion; private final CompressionMode compressionMode; private final Decompressor decompressor; @@ -88,6 +91,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0); + this.version = reader.version; this.closed = false; } @@ -99,17 +103,20 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem boolean success = false; fieldInfos = fn; numDocs = si.getDocCount(); - IndexInput indexStream = null; + ChecksumIndexInput indexStream = null; try { // Load the index into memory final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION); - indexStream = d.openInput(indexStreamFN, context); + indexStream = d.openChecksumInput(indexStreamFN, context); final String codecNameIdx = formatName + CODEC_SFX_IDX; - int version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); + version = CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT); assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer(); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); - if (indexStream.getFilePointer() != indexStream.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexStreamFN + "\": read " + indexStream.getFilePointer() + " vs size " + indexStream.length() + " (resource: " + indexStream + ")"); + + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(indexStream); + } else { + CodecUtil.checkEOF(indexStream); } indexStream.close(); indexStream = null; @@ -1045,5 +1052,12 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem public long ramBytesUsed() { return indexReader.ramBytesUsed(); } + + @Override + public void checkIntegrity() throws IOException { + if (version >= VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(vectorsStream); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java index 9b05c14a34e..176159dc751 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java @@ -66,7 +66,8 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter { static final String CODEC_SFX_DAT = "Data"; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_CHECKSUM = 1; + static final int VERSION_CURRENT = VERSION_CHECKSUM; static final int BLOCK_SIZE = 64; @@ -220,9 +221,11 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter { lastTerm = new BytesRef(ArrayUtil.oversize(30, 1)); boolean success = false; - IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); + IndexOutput indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), + context); try { - vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); + vectorsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), + context); final String codecNameIdx = formatName + CODEC_SFX_IDX; final String codecNameDat = formatName + CODEC_SFX_DAT; @@ -659,6 +662,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter { throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs); } indexWriter.finish(numDocs); + CodecUtil.writeFooter(vectorsStream); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java index 2de7a2be3ad..2377c6fb681 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.Arrays; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -198,9 +200,12 @@ final class BitVector implements Cloneable, MutableBits { // Changed DGaps to encode gaps between cleared bits, not // set: public final static int VERSION_DGAPS_CLEARED = 1; + + // added checksum + public final static int VERSION_CHECKSUM = 2; // Increment version to change it: - public final static int VERSION_CURRENT = VERSION_DGAPS_CLEARED; + public final static int VERSION_CURRENT = VERSION_CHECKSUM; public int getVersion() { return version; @@ -221,6 +226,7 @@ final class BitVector implements Cloneable, MutableBits { } else { writeBits(output); } + CodecUtil.writeFooter(output); assert verifyCount(); } finally { IOUtils.close(output); @@ -324,7 +330,7 @@ final class BitVector implements Cloneable, MutableBits { d, as written by the {@link #write} method. */ public BitVector(Directory d, String name, IOContext context) throws IOException { - IndexInput input = d.openInput(name, context); + ChecksumIndexInput input = d.openChecksumInput(name, context); try { final int firstInt = input.readInt(); @@ -334,8 +340,8 @@ final class BitVector implements Cloneable, MutableBits { version = CodecUtil.checkHeader(input, CODEC, VERSION_START, VERSION_CURRENT); size = input.readInt(); } else { - version = VERSION_PRE; - size = firstInt; + // we started writing full header well before 4.0 + throw new IndexFormatTooOldException(input.toString(), Integer.toString(firstInt)); } if (size == -1) { if (version >= VERSION_DGAPS_CLEARED) { @@ -351,6 +357,11 @@ final class BitVector implements Cloneable, MutableBits { invertAll(); } + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(input); + } else { + CodecUtil.checkEOF(input); + } assert verifyCount(); } finally { input.close(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java index a5b545266c5..52fbd338ae2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java @@ -105,9 +105,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer { default: throw new AssertionError(); } - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + CodecUtil.checkEOF(input); success = true; } finally { if (success) { @@ -327,9 +325,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer { PagedBytes bytes = new PagedBytes(16); bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount()); final PagedBytes.Reader bytesReader = bytes.freeze(true); - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + CodecUtil.checkEOF(input); success = true; ramBytesUsed.addAndGet(bytes.ramBytesUsed()); return new BinaryDocValues() { @@ -367,12 +363,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer { bytes.copy(data, totalBytes); final PagedBytes.Reader bytesReader = bytes.freeze(true); final PackedInts.Reader reader = PackedInts.getReader(index); - if (data.getFilePointer() != data.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); - } - if (index.getFilePointer() != index.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); - } + CodecUtil.checkEOF(data); + CodecUtil.checkEOF(index); success = true; ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); return new BinaryDocValues() { @@ -414,12 +406,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer { bytes.copy(data, fixedLength * (long) valueCount); final PagedBytes.Reader bytesReader = bytes.freeze(true); final PackedInts.Reader reader = PackedInts.getReader(index); - if (data.getFilePointer() != data.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); - } - if (index.getFilePointer() != index.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); - } + CodecUtil.checkEOF(data); + CodecUtil.checkEOF(index); ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); success = true; return new BinaryDocValues() { @@ -459,12 +447,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer { bytes.copy(data, totalBytes); final PagedBytes.Reader bytesReader = bytes.freeze(true); final PackedInts.Reader reader = PackedInts.getReader(index); - if (data.getFilePointer() != data.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); - } - if (index.getFilePointer() != index.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); - } + CodecUtil.checkEOF(data); + CodecUtil.checkEOF(index); ramBytesUsed.addAndGet(bytes.ramBytesUsed() + reader.ramBytesUsed()); success = true; return new BinaryDocValues() { @@ -515,12 +499,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer { default: throw new AssertionError(); } - if (data.getFilePointer() != data.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")"); - } - if (index.getFilePointer() != index.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")"); - } + CodecUtil.checkEOF(data); + CodecUtil.checkEOF(index); success = true; } finally { if (success) { @@ -654,4 +634,8 @@ final class Lucene40DocValuesReader extends DocValuesProducer { public long ramBytesUsed() { return ramBytesUsed.get(); } + + @Override + public void checkIntegrity() throws IOException { + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java index a915e00622b..20536058f1e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java @@ -107,9 +107,7 @@ class Lucene40FieldInfosReader extends FieldInfosReader { omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, Collections.unmodifiableMap(attributes)); } - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + CodecUtil.checkEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return fieldInfos; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index e73a7a2a9cc..aefed2ddbca 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -1168,4 +1168,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { return 0; } + @Override + public void checkIntegrity() throws IOException {} + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java index 7427f80ed76..188af99bbbe 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java @@ -64,9 +64,7 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader { input.readStringStringMap(); // read deprecated attributes final Set files = input.readStringSet(); - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + CodecUtil.checkEOF(input); final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); si.setFiles(files); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index 651bbde52fd..47dc8e15e41 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -34,6 +34,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; import java.io.Closeable; +import java.nio.charset.StandardCharsets; import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*; @@ -193,7 +194,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme if ((bits & FIELD_IS_BINARY) != 0) { visitor.binaryField(info, bytes); } else { - visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8)); + visitor.stringField(info, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)); } } } @@ -249,4 +250,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme public long ramBytesUsed() { return 0; } + + @Override + public void checkIntegrity() throws IOException {} } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index fc830ce869b..040b844f5de 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -760,5 +760,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos public long ramBytesUsed() { return 0; } + + @Override + public void checkIntegrity() throws IOException {} } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java index fd3f5175cc1..5633f0e3f2b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java @@ -132,6 +132,7 @@ import org.apache.lucene.util.packed.PackedInts; *
      • Header, --> {@link CodecUtil#writeHeader CodecHeader}
      • *
      • PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}
      • *
      • DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link DataOutput#writeVLong VLong}
      • + *
      • Footer --> {@link CodecUtil#writeFooter CodecFooter}
      • *
      *

      Notes:

      *
        @@ -190,7 +191,7 @@ import org.apache.lucene.util.packed.PackedInts; * each packed or VInt block, when the length of document list is larger than packed block size.

        * *
          - *
        • docFile(.doc) --> Header, <TermFreqs, SkipData?>TermCount
        • + *
        • docFile(.doc) --> Header, <TermFreqs, SkipData?>TermCount, Footer
        • *
        • Header --> {@link CodecUtil#writeHeader CodecHeader}
        • *
        • TermFreqs --> <PackedBlock> PackedDocBlockNum, * VIntBlock?
        • @@ -206,6 +207,7 @@ import org.apache.lucene.util.packed.PackedInts; * --> * {@link DataOutput#writeVInt VInt} *
        • SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}
        • + *
        • Footer --> {@link CodecUtil#writeFooter CodecFooter}
        • *
        *

        Notes:

        *
          @@ -273,7 +275,7 @@ import org.apache.lucene.util.packed.PackedInts; *

          The .pos file contains the lists of positions that each term occurs at within documents. It also * sometimes stores part of payloads and offsets for speedup.

          *
            - *
          • PosFile(.pos) --> Header, <TermPositions> TermCount
          • + *
          • PosFile(.pos) --> Header, <TermPositions> TermCount, Footer
          • *
          • Header --> {@link CodecUtil#writeHeader CodecHeader}
          • *
          • TermPositions --> <PackedPosDeltaBlock> PackedPosBlockNum, * VIntBlock?
          • @@ -283,6 +285,7 @@ import org.apache.lucene.util.packed.PackedInts; *
          • PositionDelta, OffsetDelta, OffsetLength --> * {@link DataOutput#writeVInt VInt}
          • *
          • PayloadData --> {@link DataOutput#writeByte byte}PayLength
          • + *
          • Footer --> {@link CodecUtil#writeFooter CodecFooter}
          • *
          *

          Notes:

          *
            @@ -325,13 +328,14 @@ import org.apache.lucene.util.packed.PackedInts; *

            The .pay file will store payloads and offsets associated with certain term-document positions. * Some payloads and offsets will be separated out into .pos file, for performance reasons.

            *
              - *
            • PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> TermCount
            • + *
            • PayFile(.pay): --> Header, <TermPayloads, TermOffsets?> TermCount, Footer
            • *
            • Header --> {@link CodecUtil#writeHeader CodecHeader}
            • *
            • TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData> PackedPayBlockNum *
            • TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock> PackedPayBlockNum *
            • PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock --> {@link PackedInts PackedInts}
            • *
            • SumPayLength --> {@link DataOutput#writeVInt VInt}
            • *
            • PayData --> {@link DataOutput#writeByte byte}SumPayLength
            • + *
            • Footer --> {@link CodecUtil#writeFooter CodecFooter}
            • *
            *

            Notes:

            *
              diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java index 9ae2265617e..25dc1bdecbe 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java @@ -35,7 +35,6 @@ import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -1547,4 +1546,18 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { return 0; } + @Override + public void checkIntegrity() throws IOException { + if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) { + if (docIn != null) { + CodecUtil.checksumEntireFile(docIn); + } + if (posIn != null) { + CodecUtil.checksumEntireFile(posIn); + } + if (payIn != null) { + CodecUtil.checksumEntireFile(payIn); + } + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java index 208af3c122c..2beea99a9db 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java @@ -64,11 +64,12 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase { // Increment version to change it final static int VERSION_START = 0; final static int VERSION_META_ARRAY = 1; - final static int VERSION_CURRENT = VERSION_META_ARRAY; + final static int VERSION_CHECKSUM = 2; + final static int VERSION_CURRENT = VERSION_CHECKSUM; - final IndexOutput docOut; - final IndexOutput posOut; - final IndexOutput payOut; + IndexOutput docOut; + IndexOutput posOut; + IndexOutput payOut; final static IntBlockTermState emptyState = new IntBlockTermState(); IntBlockTermState lastState; @@ -113,7 +114,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase { super(); docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), - state.context); + state.context); IndexOutput posOut = null; IndexOutput payOut = null; boolean success = false; @@ -123,7 +124,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase { if (state.fieldInfos.hasProx()) { posDeltaBuffer = new int[MAX_DATA_SIZE]; posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), - state.context); + state.context); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.fieldInfos.hasPayloads()) { @@ -144,7 +145,7 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase { if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), - state.context); + state.context); CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT); } } else { @@ -569,6 +570,26 @@ public final class Lucene41PostingsWriter extends PushPostingsWriterBase { @Override public void close() throws IOException { - IOUtils.close(docOut, posOut, payOut); + // TODO: add a finish() at least to PushBase? DV too...? + boolean success = false; + try { + if (docOut != null) { + CodecUtil.writeFooter(docOut); + } + if (posOut != null) { + CodecUtil.writeFooter(posOut); + } + if (payOut != null) { + CodecUtil.writeFooter(payOut); + } + success = true; + } finally { + if (success) { + IOUtils.close(docOut, posOut, payOut); + } else { + IOUtils.closeWhileHandlingException(docOut, posOut, payOut); + } + docOut = posOut = payOut = null; + } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java index 00f18606a49..02e6001fd4b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java @@ -68,7 +68,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter; *

              The DocValues metadata or .dvm file.

              *

              For DocValues field, this stores metadata, such as the offset into the * DocValues data (.dvd)

              - *

              DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry>NumFields

              + *

              DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry>NumFields,Footer

              *
                *
              • Entry --> NumericEntry | BinaryEntry | SortedEntry
              • *
              • NumericEntry --> DataOffset,CompressionType,PackedVersion
              • @@ -78,6 +78,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter; *
              • DataOffset,DataLength --> {@link DataOutput#writeLong Int64}
              • *
              • EntryType,CompressionType --> {@link DataOutput#writeByte Byte}
              • *
              • Header --> {@link CodecUtil#writeHeader CodecHeader}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *

              Sorted fields have two entries: a SortedEntry with the FST metadata, * and an ordinary NumericEntry for the document-to-ord metadata.

              @@ -105,7 +106,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter; *
            • *

              The DocValues data or .dvd file.

              *

              For DocValues field, this stores the actual per-document data (the heavy-lifting)

              - *

              DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData>NumFields

              + *

              DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData>NumFields,Footer

              *
                *
              • NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics
              • *
              • BinaryData --> {@link DataOutput#writeByte Byte}DataLength,Addresses
              • @@ -114,6 +115,7 @@ import org.apache.lucene.util.packed.BlockPackedWriter; *
              • TableCompressedNumerics --> TableSize,{@link DataOutput#writeLong Int64}TableSize,{@link PackedInts PackedInts}
              • *
              • UncompressedNumerics --> {@link DataOutput#writeByte Byte}maxdoc
              • *
              • Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=4096)}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *

              SortedSet entries store the list of ordinals in their BinaryData as a * sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.

              diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java index c9f842c8559..afe956be636 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -64,6 +65,7 @@ class Lucene42DocValuesProducer extends DocValuesProducer { private final Map binaries; private final Map fsts; private final IndexInput data; + private final int version; // ram instances we have already loaded private final Map numericInstances = @@ -89,16 +91,16 @@ class Lucene42DocValuesProducer extends DocValuesProducer { static final int VERSION_START = 0; static final int VERSION_GCD_COMPRESSION = 1; - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; + static final int VERSION_CHECKSUM = 2; + static final int VERSION_CURRENT = VERSION_CHECKSUM; Lucene42DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { maxDoc = state.segmentInfo.getDocCount(); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. - IndexInput in = state.directory.openInput(metaName, state.context); + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); boolean success = false; ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); - final int version; try { version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, @@ -108,8 +110,10 @@ class Lucene42DocValuesProducer extends DocValuesProducer { fsts = new HashMap<>(); readFields(in, state.fieldInfos); - if (in.getFilePointer() != in.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); + if (version >= VERSION_CHECKSUM) { + CodecUtil.checkFooter(in); + } else { + CodecUtil.checkEOF(in); } success = true; @@ -199,6 +203,13 @@ class Lucene42DocValuesProducer extends DocValuesProducer { return ramBytesUsed.get(); } + @Override + public void checkIntegrity() throws IOException { + if (version >= VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(data); + } + } + private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.number); data.seek(entry.offset); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java index 16c07c1e64a..c18723d3987 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java @@ -92,9 +92,7 @@ final class Lucene42FieldInfosReader extends FieldInfosReader { omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); } - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); - } + CodecUtil.checkEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return fieldInfos; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java index 4c87bce59a9..c7afc622e43 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsConsumer.java @@ -34,14 +34,12 @@ import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.PackedInts.FormatAndBits; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT; + /** * Writer for {@link Lucene42NormsFormat} */ -class Lucene42NormsConsumer extends DocValuesConsumer { - static final int VERSION_START = 0; - static final int VERSION_GCD_COMPRESSION = 1; - static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION; - +class Lucene42NormsConsumer extends DocValuesConsumer { static final byte NUMBER = 0; static final int BLOCK_SIZE = 4096; @@ -51,7 +49,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer { static final byte UNCOMPRESSED = 2; static final byte GCD_COMPRESSED = 3; - final IndexOutput data, meta; + IndexOutput data, meta; final int maxDoc; final float acceptableOverheadRatio; @@ -181,6 +179,10 @@ class Lucene42NormsConsumer extends DocValuesConsumer { try { if (meta != null) { meta.writeVInt(-1); // write EOF marker + CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { + CodecUtil.writeFooter(data); // write checksum } success = true; } finally { @@ -189,6 +191,7 @@ class Lucene42NormsConsumer extends DocValuesConsumer { } else { IOUtils.closeWhileHandlingException(data, meta); } + meta = data = null; } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java index be5dcb3f163..b6071dbac8a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42TermVectorsFormat.java @@ -59,7 +59,7 @@ import org.apache.lucene.util.packed.PackedInts; * {@link BlockPackedWriter blocks of packed ints} for positions.

              *

              Here is a more detailed description of the field data file format:

              *
                - *
              • VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk>ChunkCount
              • + *
              • VectorData (.tvd) --> <Header>, PackedIntsVersion, ChunkSize, <Chunk>ChunkCount, Footer
              • *
              • Header --> {@link CodecUtil#writeHeader CodecHeader}
              • *
              • PackedIntsVersion --> {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}
              • *
              • ChunkSize is the number of bytes of terms to accumulate before flushing, as a {@link DataOutput#writeVInt VInt}
              • @@ -107,14 +107,16 @@ import org.apache.lucene.util.packed.PackedInts; *
              • FieldTermsAndPayLoads --> Terms (Payloads)
              • *
              • Terms: term bytes
              • *
              • Payloads: payload bytes (if the field has payloads)
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *
            • *
            • *

              An index file (extension .tvx).

              *
                - *
              • VectorIndex (.tvx) --> <Header>, <ChunkIndex>
              • + *
              • VectorIndex (.tvx) --> <Header>, <ChunkIndex>, Footer
              • *
              • Header --> {@link CodecUtil#writeHeader CodecHeader}
              • *
              • ChunkIndex: See {@link CompressingStoredFieldsIndexWriter}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *
            • * diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java index aa6cdb85634..d2c03914953 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java @@ -66,7 +66,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos * of indirection: docId -> ord. */ public static final int SORTED_SET_SINGLE_VALUED_SORTED = 1; - final IndexOutput data, meta; + IndexOutput data, meta; final int maxDoc; /** expert: Creates a new writer */ @@ -438,6 +438,10 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos try { if (meta != null) { meta.writeVInt(-1); // write EOF marker + CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { + CodecUtil.writeFooter(data); // write checksum } success = true; } finally { @@ -446,6 +450,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Clos } else { IOUtils.closeWhileHandlingException(data, meta); } + meta = data = null; } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java index 8004ac206a8..9d91867650f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java @@ -89,7 +89,7 @@ import org.apache.lucene.util.packed.PackedInts; *

              The DocValues metadata or .dvm file.

              *

              For DocValues field, this stores metadata, such as the offset into the * DocValues data (.dvd)

              - *

              DocValues metadata (.dvm) --> Header,<Entry>NumFields

              + *

              DocValues metadata (.dvm) --> Header,<Entry>NumFields,Footer

              *
                *
              • Entry --> NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry
              • *
              • NumericEntry --> GCDNumericEntry | TableNumericEntry | DeltaNumericEntry
              • @@ -109,6 +109,7 @@ import org.apache.lucene.util.packed.PackedInts; *
              • Header --> {@link CodecUtil#writeHeader CodecHeader}
              • *
              • MinValue,GCD,MissingOffset,AddressOffset,DataOffset --> {@link DataOutput#writeLong Int64}
              • *
              • TableSize --> {@link DataOutput#writeVInt vInt}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *

              Sorted fields have two entries: a BinaryEntry with the value metadata, * and an ordinary NumericEntry for the document-to-ord metadata.

              @@ -138,10 +139,13 @@ import org.apache.lucene.util.packed.PackedInts; * is written for the addresses. *

              MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field. * If its -1, then there are no missing values. + *

              Checksum contains the CRC32 checksum of all bytes in the .dvm file up + * until the checksum. This is used to verify integrity of the file on opening the + * index. *

            • *

              The DocValues data or .dvd file.

              *

              For DocValues field, this stores the actual per-document data (the heavy-lifting)

              - *

              DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData>NumFields

              + *

              DocValues data (.dvd) --> Header,<NumericData | BinaryData | SortedData>NumFields,Footer

              *
                *
              • NumericData --> DeltaCompressedNumerics | TableCompressedNumerics | GCDCompressedNumerics
              • *
              • BinaryData --> {@link DataOutput#writeByte Byte}DataLength,Addresses
              • @@ -150,6 +154,7 @@ import org.apache.lucene.util.packed.PackedInts; *
              • TableCompressedNumerics --> {@link PackedInts PackedInts}
              • *
              • GCDCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}
              • *
              • Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *

              SortedSet entries store the list of ordinals in their BinaryData as a * sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.

              @@ -179,7 +184,8 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat { static final String META_EXTENSION = "dvm"; static final int VERSION_START = 0; static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1; - static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED; + static final int VERSION_CHECKSUM = 2; + static final int VERSION_CURRENT = VERSION_CHECKSUM; static final byte NUMERIC = 0; static final byte BINARY = 1; static final byte SORTED = 2; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java index b84c6ecc8eb..8baccefa6cf 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java @@ -50,6 +50,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -80,7 +81,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. - IndexInput in = state.directory.openInput(metaName, state.context); + ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); this.maxDoc = state.segmentInfo.getDocCount(); boolean success = false; try { @@ -94,8 +95,10 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos sortedSets = new HashMap<>(); readFields(in, state.fieldInfos); - if (in.getFilePointer() != in.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + metaName + "\": read " + in.getFilePointer() + " vs size " + in.length() + " (resource: " + in + ")"); + if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { + CodecUtil.checkFooter(in); + } else { + CodecUtil.checkEOF(in); } success = true; @@ -299,6 +302,13 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos return ramBytesUsed.get(); } + @Override + public void checkIntegrity() throws IOException { + if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { + CodecUtil.checksumEntireFile(data); + } + } + LongValues getNumeric(NumericEntry entry) throws IOException { final IndexInput data = this.data.clone(); data.seek(entry.offset); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java index 31101166cf8..82b98cd349f 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosFormat.java @@ -32,7 +32,7 @@ import org.apache.lucene.store.DataOutput; *

              *

              Field names are stored in the field info file, with suffix .fnm.

              *

              FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, - * FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount

              + * FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount,Footer

              *

              Data types: *

                *
              • Header --> {@link CodecUtil#checkHeader CodecHeader}
              • @@ -42,6 +42,7 @@ import org.apache.lucene.store.DataOutput; *
              • FieldNumber --> {@link DataOutput#writeInt VInt}
              • *
              • Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}
              • *
              • DocValuesGen --> {@link DataOutput#writeLong(long) Int64}
              • + *
              • Footer --> {@link CodecUtil#writeFooter CodecFooter}
              • *
              *

              * Field Descriptions: @@ -113,7 +114,8 @@ public final class Lucene46FieldInfosFormat extends FieldInfosFormat { // Codec header static final String CODEC_NAME = "Lucene46FieldInfos"; static final int FORMAT_START = 0; - static final int FORMAT_CURRENT = FORMAT_START; + static final int FORMAT_CHECKSUM = 1; + static final int FORMAT_CURRENT = FORMAT_CHECKSUM; // Field flags static final byte IS_INDEXED = 0x1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java index ee9a7dc2071..0b24eaf0eb6 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosReader.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -49,13 +50,13 @@ final class Lucene46FieldInfosReader extends FieldInfosReader { @Override public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); - IndexInput input = directory.openInput(fileName, context); + ChecksumIndexInput input = directory.openChecksumInput(fileName, context); boolean success = false; try { - CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, - Lucene46FieldInfosFormat.FORMAT_START, - Lucene46FieldInfosFormat.FORMAT_CURRENT); + int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, + Lucene46FieldInfosFormat.FORMAT_START, + Lucene46FieldInfosFormat.FORMAT_CURRENT); final int size = input.readVInt(); //read in the size FieldInfo infos[] = new FieldInfo[size]; @@ -91,9 +92,11 @@ final class Lucene46FieldInfosReader extends FieldInfosReader { omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); infos[i].setDocValuesGen(dvGen); } - - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + + if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { + CodecUtil.checkFooter(input); + } else { + CodecUtil.checkEOF(input); } FieldInfos fieldInfos = new FieldInfos(infos); success = true; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java index e7b051808ba..5aed57b5f54 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46FieldInfosWriter.java @@ -26,9 +26,9 @@ import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.IOUtils; /** @@ -81,6 +81,7 @@ final class Lucene46FieldInfosWriter extends FieldInfosWriter { output.writeLong(fi.getDocValuesGen()); output.writeStringStringMap(fi.attributes()); } + CodecUtil.writeFooter(output); success = true; } finally { if (success) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java index b73e2e98459..090b216523c 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java @@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs *

              * Files: *

                - *
              • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files + *
              • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer *
              *

              * Data types: @@ -43,6 +43,7 @@ import org.apache.lucene.store.DataOutput; // javadocs *
            • Files --> {@link DataOutput#writeStringSet Set<String>}
            • *
            • Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}
            • *
            • IsCompoundFile --> {@link DataOutput#writeByte Int8}
            • + *
            • Footer --> {@link CodecUtil#writeFooter CodecFooter}
            • *
            *

            * Field Descriptions: @@ -53,9 +54,6 @@ import org.apache.lucene.store.DataOutput; // javadocs *
          • IsCompoundFile records whether the segment is written as a compound file or * not. If this is -1, the segment is not a compound file. If it is 1, the segment * is a compound file.
          • - *
          • Checksum contains the CRC32 checksum of all bytes in the segments_N file up - * until the checksum. This is used to verify integrity of the file on opening the - * index.
          • *
          • The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, * for each segment it creates. It includes metadata like the current Lucene * version, OS, Java version, why the segment was created (merge, flush, @@ -89,5 +87,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat { public final static String SI_EXTENSION = "si"; static final String CODEC_NAME = "Lucene46SegmentInfo"; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_CHECKSUM = 1; + static final int VERSION_CURRENT = VERSION_CHECKSUM; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java index e733be343ea..6a1cb6b7a6b 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java @@ -26,9 +26,9 @@ import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; /** @@ -46,12 +46,12 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader { @Override public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION); - final IndexInput input = dir.openInput(fileName, context); + final ChecksumIndexInput input = dir.openChecksumInput(fileName, context); boolean success = false; try { - CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME, - Lucene46SegmentInfoFormat.VERSION_START, - Lucene46SegmentInfoFormat.VERSION_CURRENT); + int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME, + Lucene46SegmentInfoFormat.VERSION_START, + Lucene46SegmentInfoFormat.VERSION_CURRENT); final String version = input.readString(); final int docCount = input.readInt(); if (docCount < 0) { @@ -61,8 +61,10 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader { final Map diagnostics = input.readStringStringMap(); final Set files = input.readStringSet(); - if (input.getFilePointer() != input.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) { + CodecUtil.checkFooter(input); + } else { + CodecUtil.checkEOF(input); } final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java index 10df630154a..af03f40e2bd 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java @@ -59,7 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter { output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); output.writeStringStringMap(si.getDiagnostics()); output.writeStringSet(si.files()); - + CodecUtil.writeFooter(output); success = true; } finally { if (!success) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html index 712b12c60af..f3cb05c9223 100755 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/package.html @@ -383,6 +383,9 @@ on multi-valued fields.
          • In version 4.5, DocValues were extended to explicitly represent missing values.
          • In version 4.6, FieldInfos were extended to support per-field DocValues generation, to allow updating NumericDocValues fields.
          • +
          • In version 4.8, checksum footers were added to the end of each index file +for improved data integrity. Specifically, the last 8 bytes of every index file +contain the zlib-crc32 checksum of the file.

          Limitations

          diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index cf960ac63d4..a2e0ad6b5d9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -310,6 +310,13 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { } return size; } + + @Override + public void checkIntegrity() throws IOException { + for (DocValuesProducer format : formats.values()) { + format.checkIntegrity(); + } + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java index 0091a9ed1a3..8ac67547e2d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java @@ -246,6 +246,13 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException { + for (FieldsProducer producer : formats.values()) { + producer.checkIntegrity(); + } + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java index a3e28e85c11..2e739f733c0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java @@ -238,4 +238,13 @@ public abstract class AtomicReader extends IndexReader { * synchronization. */ public abstract Bits getLiveDocs(); + + /** + * Checks consistency of this reader. + *

          + * Note that this may be costly in terms of I/O, e.g. + * may involve computing a checksum value against large data files. + * @lucene.internal + */ + public abstract void checkIntegrity() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index fc3951a5aab..eb21dccfced 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -536,6 +536,10 @@ public class CheckIndex { reader = new SegmentReader(info, IOContext.DEFAULT); segInfoStat.openReaderPassed = true; + + if (infoStream != null) + infoStream.print(" test: check integrity........."); + reader.checkIntegrity(); final int numDocs = reader.numDocs(); toLoseDocCount = numDocs; diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 9b3b214495c..36d2251e9c1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -423,4 +423,9 @@ public class FilterAtomicReader extends AtomicReader { return in.getDocsWithField(field); } + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + in.checkIntegrity(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 53a55ef1d4b..0c6ecb07413 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2085,7 +2085,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ rollbackInternal(); } } - assert assertEventQueueAfterClose(); } private void rollbackInternal() throws IOException { @@ -2180,11 +2179,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ } closed = true; closing = false; - try { - processEvents(false, true); - } finally { - notifyAll(); - } } } } @@ -2657,7 +2651,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ false, codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, - MergeState.CheckAbort.NONE, globalFieldNumberMap, context); + MergeState.CheckAbort.NONE, globalFieldNumberMap, + context, config.getCheckIntegrityAtMerge()); if (!merger.shouldMerge()) { return; @@ -4057,7 +4052,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit{ // OneMerge to return a view over the actual segments to merge final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), merge.info.info, infoStream, dirWrapper, - checkAbort, globalFieldNumberMap, context); + checkAbort, globalFieldNumberMap, + context, config.getCheckIntegrityAtMerge()); merge.checkAborted(directory); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index e94e421e750..eb295a7c50f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -110,6 +110,12 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl * (set to true). For batch indexing with very large * ram buffers use false */ public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true; + + /** Default value for calling {@link AtomicReader#checkIntegrity()} before + * merging segments (set to false). You can set this + * to true for additional safety. */ + public final static boolean DEFAULT_CHECK_INTEGRITY_AT_MERGE = false; + /** * Sets the default (for any instance) maximum time to wait for a write lock * (in milliseconds). diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 23e1cf28869..82f9979953e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -97,6 +97,9 @@ public class LiveIndexWriterConfig { /** True if segment flushes should use compound file format */ protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM; + + /** True if merging should check integrity of segments before merge */ + protected volatile boolean checkIntegrityAtMerge = IndexWriterConfig.DEFAULT_CHECK_INTEGRITY_AT_MERGE; // used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { @@ -152,6 +155,7 @@ public class LiveIndexWriterConfig { flushPolicy = config.getFlushPolicy(); perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB(); useCompoundFile = config.getUseCompoundFile(); + checkIntegrityAtMerge = config.getCheckIntegrityAtMerge(); } /** Returns the default analyzer to use for indexing documents. */ @@ -475,6 +479,26 @@ public class LiveIndexWriterConfig { return useCompoundFile ; } + /** + * Sets if {@link IndexWriter} should call {@link AtomicReader#checkIntegrity()} + * on existing segments before merging them into a new one. + *

          + * Use true to enable this safety check, which can help + * reduce the risk of propagating index corruption from older segments + * into new ones, at the expense of slower merging. + *

          + */ + public LiveIndexWriterConfig setCheckIntegrityAtMerge(boolean checkIntegrityAtMerge) { + this.checkIntegrityAtMerge = checkIntegrityAtMerge; + return this; + } + + /** Returns true if {@link AtomicReader#checkIntegrity()} is called before + * merging segments. */ + public boolean getCheckIntegrityAtMerge() { + return checkIntegrityAtMerge; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -499,6 +523,7 @@ public class LiveIndexWriterConfig { sb.append("readerPooling=").append(getReaderPooling()).append("\n"); sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); + sb.append("checkIntegrityAtMerge=").append(getCheckIntegrityAtMerge()).append("\n"); return sb.toString(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java index f753b3ed8f4..6b5bd667f9e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java @@ -299,4 +299,12 @@ public class ParallelAtomicReader extends AtomicReader { NumericDocValues values = reader == null ? null : reader.getNormValues(field); return values; } + + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + for (AtomicReader reader : completeReaderSet) { + reader.checkIntegrity(); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java index 4f09296af0f..edd1f561c40 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java @@ -95,14 +95,4 @@ final class SegmentDocValues { IOUtils.reThrow(t); } } - - /** Returns approximate RAM bytes used. */ - synchronized long ramBytesUsed() { - long ramBytesUsed = 0; - for (RefCount dvp : genDVProducers.values()) { - ramBytesUsed += dvp.get().ramBytesUsed(); - } - return ramBytesUsed; - } - } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 65709ed4267..82a9d756d9a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -36,11 +36,9 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.ChecksumIndexOutput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.NoSuchDirectoryException; import org.apache.lucene.util.IOUtils; @@ -69,10 +67,10 @@ import org.apache.lucene.util.IOUtils; *

          * Files: *

            - *
          • segments.gen: GenHeader, Generation, Generation + *
          • segments.gen: GenHeader, Generation, Generation, Footer *
          • segments_N: Header, Version, NameCounter, SegCount, * <SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles>SegCount, - * CommitUserData, Checksum + * CommitUserData, Footer *
          *

          * Data types: @@ -84,6 +82,7 @@ import org.apache.lucene.util.IOUtils; *
        • SegName, SegCodec --> {@link DataOutput#writeString String}
        • *
        • CommitUserData --> {@link DataOutput#writeStringStringMap Map<String,String>}
        • *
        • UpdatesFiles --> {@link DataOutput#writeStringSet(Set) Set<String>}
        • + *
        • Footer --> {@link CodecUtil#writeFooter CodecFooter}
        • *
        *

        * Field Descriptions: @@ -98,9 +97,6 @@ import org.apache.lucene.util.IOUtils; * there are no deletes. Anything above zero means there are deletes * stored by {@link LiveDocsFormat}. *
      • DeletionCount records the number of deleted documents in this segment.
      • - *
      • Checksum contains the CRC32 checksum of all bytes in the segments_N file up - * until the checksum. This is used to verify integrity of the file on opening the - * index.
      • *
      • SegCodec is the {@link Codec#getName() name} of the Codec that encoded * this segment.
      • *
      • CommitUserData stores an optional user-supplied opaque @@ -122,10 +118,17 @@ public final class SegmentInfos implements Cloneable, Iterable= VERSION_48) { + CodecUtil.checkFooter(input); + } else { + final long checksumNow = input.getChecksum(); + final long checksumThen = input.readLong(); + if (checksumNow != checksumThen) { + throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")"); + } + CodecUtil.checkEOF(input); } success = true; @@ -402,7 +411,7 @@ public final class SegmentInfos implements Cloneable, Iterable readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, - MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException { + MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException { + // validate incoming readers + if (validate) { + for (AtomicReader reader : readers) { + reader.checkIntegrity(); + } + } mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); directory = dir; this.codec = segmentInfo.getCodec(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index 18a7e19a439..6f0f74d3605 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -33,10 +33,13 @@ import org.apache.lucene.util.IOUtils; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; /** * IndexReader implementation over a single segment. @@ -72,7 +75,8 @@ public final class SegmentReader extends AtomicReader { } }; - final Map dvProducers = new HashMap<>(); + final Map dvProducersByField = new HashMap<>(); + final Set dvProducers = Collections.newSetFromMap(new IdentityHashMap()); final FieldInfos fieldInfos; @@ -177,12 +181,15 @@ public final class SegmentReader extends AtomicReader { // System.out.println("[" + Thread.currentThread().getName() + "] SR.initDocValuesProducers: segInfo=" + si + "; gens=" + genInfos.keySet()); + // TODO: can we avoid iterating over fieldinfos several times and creating maps of all this stuff if dv updates do not exist? + for (Entry> e : genInfos.entrySet()) { Long gen = e.getKey(); List infos = e.getValue(); DocValuesProducer dvp = segDocValues.getDocValuesProducer(gen, si, IOContext.READ, dir, dvFormat, infos); for (FieldInfo fi : infos) { - dvProducers.put(fi.name, dvp); + dvProducersByField.put(fi.name, dvp); + dvProducers.add(dvp); } } @@ -250,7 +257,7 @@ public final class SegmentReader extends AtomicReader { try { core.decRef(); } finally { - dvProducers.clear(); + dvProducersByField.clear(); try { IOUtils.close(docValuesLocal, docsWithFieldLocal); } finally { @@ -395,13 +402,12 @@ public final class SegmentReader extends AtomicReader { return null; } - DocValuesProducer dvProducer = dvProducers.get(field); - assert dvProducer != null; - Map dvFields = docValuesLocal.get(); NumericDocValues dvs = (NumericDocValues) dvFields.get(field); if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; dvs = dvProducer.getNumeric(fi); dvFields.put(field, dvs); } @@ -422,13 +428,12 @@ public final class SegmentReader extends AtomicReader { return null; } - DocValuesProducer dvProducer = dvProducers.get(field); - assert dvProducer != null; - Map dvFields = docsWithFieldLocal.get(); Bits dvs = dvFields.get(field); if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; dvs = dvProducer.getDocsWithField(fi); dvFields.put(field, dvs); } @@ -444,13 +449,12 @@ public final class SegmentReader extends AtomicReader { return null; } - DocValuesProducer dvProducer = dvProducers.get(field); - assert dvProducer != null; - Map dvFields = docValuesLocal.get(); BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field); if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; dvs = dvProducer.getBinary(fi); dvFields.put(field, dvs); } @@ -466,13 +470,12 @@ public final class SegmentReader extends AtomicReader { return null; } - DocValuesProducer dvProducer = dvProducers.get(field); - assert dvProducer != null; - Map dvFields = docValuesLocal.get(); SortedDocValues dvs = (SortedDocValues) dvFields.get(field); if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; dvs = dvProducer.getSorted(fi); dvFields.put(field, dvs); } @@ -488,13 +491,12 @@ public final class SegmentReader extends AtomicReader { return null; } - DocValuesProducer dvProducer = dvProducers.get(field); - assert dvProducer != null; - Map dvFields = docValuesLocal.get(); SortedSetDocValues dvs = (SortedSetDocValues) dvFields.get(field); if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; dvs = dvProducer.getSortedSet(fi); dvFields.put(field, dvs); } @@ -548,12 +550,45 @@ public final class SegmentReader extends AtomicReader { public long ramBytesUsed() { ensureOpen(); long ramBytesUsed = 0; - if (segDocValues != null) { - ramBytesUsed += segDocValues.ramBytesUsed(); + if (dvProducers != null) { + for (DocValuesProducer producer : dvProducers) { + ramBytesUsed += producer.ramBytesUsed(); + } } if (core != null) { ramBytesUsed += core.ramBytesUsed(); } return ramBytesUsed; } + + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + + // stored fields + getFieldsReader().checkIntegrity(); + + // term vectors + TermVectorsReader termVectorsReader = getTermVectorsReader(); + if (termVectorsReader != null) { + termVectorsReader.checkIntegrity(); + } + + // terms/postings + if (core.fields != null) { + core.fields.checkIntegrity(); + } + + // norms + if (core.normsProducer != null) { + core.normsProducer.checkIntegrity(); + } + + // docvalues + if (dvProducers != null) { + for (DocValuesProducer producer : dvProducers) { + producer.checkIntegrity(); + } + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java index dd56512b827..b127e119dce 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java @@ -239,4 +239,12 @@ public final class SlowCompositeReaderWrapper extends AtomicReader { // TODO: as this is a wrapper, should we really close the delegate? in.close(); } + + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + for (AtomicReaderContext ctx : in.leaves()) { + ctx.reader().checkIntegrity(); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/Term.java b/lucene/core/src/java/org/apache/lucene/index/Term.java index 30d89126d50..848fe3d4820 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Term.java +++ b/lucene/core/src/java/org/apache/lucene/index/Term.java @@ -21,9 +21,9 @@ import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; /** A Term represents a word from text. This is the unit of search. It is @@ -83,7 +83,7 @@ public final class Term implements Comparable { * the raw bytes will be printed instead. */ public static final String toString(BytesRef termText) { // the term might not be text, but usually is. so we make a best effort - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); try { diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/package.html b/lucene/core/src/java/org/apache/lucene/search/similarities/package.html index bc235e5c54e..4ea2b3184d5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/package.html +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/package.html @@ -120,7 +120,7 @@ subclassing the Similarity, one can simply introduce a new basic model and tell

        Changing {@linkplain org.apache.lucene.search.similarities.DefaultSimilarity}

        If you are interested in use cases for changing your similarity, see the Lucene users's mailing list at Overriding Similarity. + href="http://www.gossamer-threads.com/lists/lucene/java-user/39125">Overriding Similarity. In summary, here are a few use cases:

        1. The SweetSpotSimilarity in diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java new file mode 100644 index 00000000000..8b34d706bd2 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksum.java @@ -0,0 +1,84 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.zip.Checksum; + +/** + * Wraps another {@link Checksum} with an internal buffer + * to speed up checksum calculations. + */ +public class BufferedChecksum implements Checksum { + private final Checksum in; + private final byte buffer[]; + private int upto; + /** Default buffer size: 256 */ + public static final int DEFAULT_BUFFERSIZE = 256; + + /** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */ + public BufferedChecksum(Checksum in) { + this(in, DEFAULT_BUFFERSIZE); + } + + /** Create a new BufferedChecksum with the specified bufferSize */ + public BufferedChecksum(Checksum in, int bufferSize) { + this.in = in; + this.buffer = new byte[bufferSize]; + } + + @Override + public void update(int b) { + if (upto == buffer.length) { + flush(); + } + buffer[upto++] = (byte) b; + } + + @Override + public void update(byte[] b, int off, int len) { + if (len >= buffer.length) { + flush(); + in.update(b, off, len); + } else { + if (upto + len > buffer.length) { + flush(); + } + System.arraycopy(b, off, buffer, upto, len); + upto += len; + } + } + + @Override + public long getValue() { + flush(); + return in.getValue(); + } + + @Override + public void reset() { + upto = 0; + in.reset(); + } + + private void flush() { + if (upto > 0) { + in.update(buffer, 0, upto); + } + upto = 0; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java similarity index 63% rename from lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java rename to lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java index 212fc5ae9bc..d3fb088776e 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedChecksumIndexInput.java @@ -21,41 +21,40 @@ import java.io.IOException; import java.util.zip.CRC32; import java.util.zip.Checksum; -/** Writes bytes through to a primary IndexOutput, computing - * checksum. - * - * @lucene.internal +/** + * Simple implementation of {@link ChecksumIndexInput} that wraps + * another input and delegates calls. */ -public class ChecksumIndexOutput extends IndexOutput { - IndexOutput main; - Checksum digest; +public class BufferedChecksumIndexInput extends ChecksumIndexInput { + final IndexInput main; + final Checksum digest; - public ChecksumIndexOutput(IndexOutput main) { + /** Creates a new BufferedChecksumIndexInput */ + public BufferedChecksumIndexInput(IndexInput main) { + super("BufferedChecksumIndexInput(" + main + ")"); this.main = main; - digest = new CRC32(); + this.digest = new BufferedChecksum(new CRC32()); } @Override - public void writeByte(byte b) throws IOException { + public byte readByte() throws IOException { + final byte b = main.readByte(); digest.update(b); - main.writeByte(b); + return b; } @Override - public void writeBytes(byte[] b, int offset, int length) throws IOException { - digest.update(b, offset, length); - main.writeBytes(b, offset, length); + public void readBytes(byte[] b, int offset, int len) + throws IOException { + main.readBytes(b, offset, len); + digest.update(b, offset, len); } + @Override public long getChecksum() { return digest.getValue(); } - @Override - public void flush() throws IOException { - main.flush(); - } - @Override public void close() throws IOException { main.close(); @@ -66,13 +65,8 @@ public class ChecksumIndexOutput extends IndexOutput { return main.getFilePointer(); } - /** writes the checksum */ - public void finishCommit() throws IOException { - main.writeLong(getChecksum()); - } - @Override - public long length() throws IOException { + public long length() { return main.length(); } } diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java index 8579a7dced3..91f72067edc 100644 --- a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java @@ -18,6 +18,7 @@ package org.apache.lucene.store; */ import java.io.IOException; +import java.util.zip.CRC32; /** Base implementation class for buffered {@link IndexOutput}. */ public abstract class BufferedIndexOutput extends IndexOutput { @@ -28,6 +29,7 @@ public abstract class BufferedIndexOutput extends IndexOutput { private final byte[] buffer; private long bufferStart = 0; // position in file of buffer private int bufferPosition = 0; // position in buffer + private final CRC32 crc = new CRC32(); /** * Creates a new {@link BufferedIndexOutput} with the default buffer size @@ -75,6 +77,7 @@ public abstract class BufferedIndexOutput extends IndexOutput { if (bufferPosition > 0) flush(); // and write data at once + crc.update(b, offset, length); flushBuffer(b, offset, length); bufferStart += length; } else { @@ -99,6 +102,7 @@ public abstract class BufferedIndexOutput extends IndexOutput { @Override public void flush() throws IOException { + crc.update(buffer, 0, bufferPosition); flushBuffer(buffer, bufferPosition); bufferStart += bufferPosition; bufferPosition = 0; @@ -141,4 +145,9 @@ public abstract class BufferedIndexOutput extends IndexOutput { return bufferSize; } + @Override + public long getChecksum() throws IOException { + flush(); + return crc.getValue(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java index 13975783735..6c2cb8fe738 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java @@ -1,5 +1,7 @@ package org.apache.lucene.store; +import java.io.IOException; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -17,61 +19,24 @@ package org.apache.lucene.store; * limitations under the License. */ -import java.io.IOException; -import java.util.zip.CRC32; -import java.util.zip.Checksum; - -/** Reads bytes through to a primary IndexInput, computing - * checksum as it goes. Note that you cannot use seek(). - * - * @lucene.internal +/** + * Extension of IndexInput, computing checksum as it goes. + * Callers can retrieve the checksum via {@link #getChecksum()}. */ -public class ChecksumIndexInput extends IndexInput { - IndexInput main; - Checksum digest; - - public ChecksumIndexInput(IndexInput main) { - super("ChecksumIndexInput(" + main + ")"); - this.main = main; - digest = new CRC32(); - } - - @Override - public byte readByte() throws IOException { - final byte b = main.readByte(); - digest.update(b); - return b; - } - - @Override - public void readBytes(byte[] b, int offset, int len) - throws IOException { - main.readBytes(b, offset, len); - digest.update(b, offset, len); - } - +public abstract class ChecksumIndexInput extends IndexInput { - public long getChecksum() { - return digest.getValue(); + /** resourceDescription should be a non-null, opaque string + * describing this resource; it's returned from + * {@link #toString}. */ + protected ChecksumIndexInput(String resourceDescription) { + super(resourceDescription); } - @Override - public void close() throws IOException { - main.close(); - } - - @Override - public long getFilePointer() { - return main.getFilePointer(); - } + /** Returns the current checksum value */ + public abstract long getChecksum() throws IOException; @Override public void seek(long pos) { throw new UnsupportedOperationException(); } - - @Override - public long length() { - return main.length(); - } } diff --git a/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java b/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java index 1532779b6c0..f1017e028f4 100644 --- a/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java @@ -52,14 +52,15 @@ import java.io.IOException; *

      *

      Description:

      *
        - *
      • Compound (.cfs) --> Header, FileData FileCount
      • + *
      • Compound (.cfs) --> Header, FileData FileCount, Footer
      • *
      • Compound Entry Table (.cfe) --> Header, FileCount, <FileName, * DataOffset, DataLength> FileCount
      • *
      • Header --> {@link CodecUtil#writeHeader CodecHeader}
      • *
      • FileCount --> {@link DataOutput#writeVInt VInt}
      • - *
      • DataOffset,DataLength --> {@link DataOutput#writeLong UInt64}
      • + *
      • DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}
      • *
      • FileName --> {@link DataOutput#writeString String}
      • *
      • FileData --> raw file data
      • + *
      • Footer --> {@link CodecUtil#writeFooter CodecFooter}
      • *
      *

      Notes:

      *
        @@ -87,6 +88,7 @@ public final class CompoundFileDirectory extends BaseDirectory { private static final Map SENTINEL = Collections.emptyMap(); private final CompoundFileWriter writer; private final IndexInputSlicer handle; + private int version; /** * Create a new CompoundFileDirectory. @@ -120,15 +122,15 @@ public final class CompoundFileDirectory extends BaseDirectory { } /** Helper method that reads CFS entries from an input stream */ - private static final Map readEntries(Directory dir, String name) throws IOException { + private final Map readEntries(Directory dir, String name) throws IOException { IOException priorE = null; - IndexInput entriesStream = null; + ChecksumIndexInput entriesStream = null; try { final String entriesFileName = IndexFileNames.segmentFileName( IndexFileNames.stripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); - entriesStream = dir.openInput(entriesFileName, IOContext.READONCE); - CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_START); + entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE); + version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); final int numEntries = entriesStream.readVInt(); final Map mapping = new HashMap<>(numEntries); for (int i = 0; i < numEntries; i++) { @@ -141,8 +143,10 @@ public final class CompoundFileDirectory extends BaseDirectory { fileEntry.offset = entriesStream.readLong(); fileEntry.length = entriesStream.readLong(); } - if (entriesStream.getFilePointer() != entriesStream.length()) { - throw new CorruptIndexException("did not read all bytes from file \"" + entriesFileName + "\": read " + entriesStream.getFilePointer() + " vs size " + entriesStream.length() + " (resource: " + entriesStream + ")"); + if (version >= CompoundFileWriter.VERSION_CHECKSUM) { + CodecUtil.checkFooter(entriesStream); + } else { + CodecUtil.checkEOF(entriesStream); } return mapping; } catch (IOException ioe) { diff --git a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java index aefe2a46d74..405bf973c90 100644 --- a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java +++ b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java @@ -54,7 +54,8 @@ final class CompoundFileWriter implements Closeable{ // versioning for the .cfs file static final String DATA_CODEC = "CompoundFileWriterData"; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_CHECKSUM = 1; + static final int VERSION_CURRENT = VERSION_CHECKSUM; // versioning for the .cfe file static final String ENTRY_CODEC = "CompoundFileWriterEntries"; @@ -140,6 +141,7 @@ final class CompoundFileWriter implements Closeable{ // open the compound stream getOutput(); assert dataOut != null; + CodecUtil.writeFooter(dataOut); } catch (IOException e) { priorException = e; } finally { @@ -202,6 +204,7 @@ final class CompoundFileWriter implements Closeable{ entryOut.writeLong(fe.offset); entryOut.writeLong(fe.length); } + CodecUtil.writeFooter(entryOut); } IndexOutput createOutput(String name, IOContext context) throws IOException { @@ -342,6 +345,11 @@ final class CompoundFileWriter implements Closeable{ writtenBytes += length; delegate.writeBytes(b, offset, length); } + + @Override + public long getChecksum() throws IOException { + return delegate.getChecksum(); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java index 6bcf18ee523..adbe38c1ada 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java @@ -18,13 +18,12 @@ package org.apache.lucene.store; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -import org.apache.lucene.util.IOUtils; - /** * Abstract base class for performing read operations of Lucene's low-level * data types. @@ -188,7 +187,7 @@ public abstract class DataInput implements Cloneable { int length = readVInt(); final byte[] bytes = new byte[length]; readBytes(bytes, 0, length); - return new String(bytes, 0, length, IOUtils.CHARSET_UTF_8); + return new String(bytes, 0, length, StandardCharsets.UTF_8); } /** Returns a clone of this stream. diff --git a/lucene/core/src/java/org/apache/lucene/store/Directory.java b/lucene/core/src/java/org/apache/lucene/store/Directory.java index 9af6ff04cbc..f45abb1ee3d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/Directory.java +++ b/lucene/core/src/java/org/apache/lucene/store/Directory.java @@ -100,7 +100,12 @@ public abstract class Directory implements Closeable { *

        Throws {@link FileNotFoundException} or {@link NoSuchFileException} * if the file does not exist. */ - public abstract IndexInput openInput(String name, IOContext context) throws IOException; + public abstract IndexInput openInput(String name, IOContext context) throws IOException; + + /** Returns a stream reading an existing file, computing checksum as it reads */ + public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException { + return new BufferedChecksumIndexInput(openInput(name, context)); + } /** Construct a {@link Lock}. * @param name the name of the lock file diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java b/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java index 5fec7b596ae..b9196a1d8f5 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java @@ -43,6 +43,8 @@ public abstract class IndexOutput extends DataOutput implements Closeable { */ public abstract long getFilePointer(); + /** Returns the current checksum of bytes written so far */ + public abstract long getChecksum() throws IOException; /** The number of bytes in the file. */ public abstract long length() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java b/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java index 67f39ef3971..1f0c74f78d7 100644 --- a/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java +++ b/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java @@ -18,6 +18,8 @@ package org.apache.lucene.store; */ import java.io.IOException; +import java.util.zip.CRC32; +import java.util.zip.Checksum; /** * A memory-resident {@link IndexOutput} implementation. @@ -35,6 +37,8 @@ public class RAMOutputStream extends IndexOutput { private int bufferPosition; private long bufferStart; private int bufferLength; + + private Checksum crc = new BufferedChecksum(new CRC32()); /** Construct an empty output buffer. */ public RAMOutputStream() { @@ -95,6 +99,7 @@ public class RAMOutputStream extends IndexOutput { bufferStart = 0; bufferLength = 0; file.setLength(0); + crc.reset(); } @Override @@ -113,12 +118,14 @@ public class RAMOutputStream extends IndexOutput { currentBufferIndex++; switchCurrentBuffer(); } + crc.update(b); currentBuffer[bufferPosition++] = b; } @Override public void writeBytes(byte[] b, int offset, int len) throws IOException { assert b != null; + crc.update(b, offset, len); while (len > 0) { if (bufferPosition == bufferLength) { currentBufferIndex++; @@ -165,5 +172,10 @@ public class RAMOutputStream extends IndexOutput { /** Returns byte usage of all buffers. */ public long sizeInBytes() { return (long) file.numBuffers() * (long) BUFFER_SIZE; - } + } + + @Override + public long getChecksum() throws IOException { + return crc.getValue(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java index 52cbf098d5b..2e183c42348 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IOUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/IOUtils.java @@ -31,23 +31,29 @@ import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; /** This class emulates the new Java 7 "Try-With-Resources" statement. * Remove once Lucene is on Java 7. * @lucene.internal */ public final class IOUtils { - /** - * UTF-8 charset string - * @see Charset#forName(String) - */ - public static final String UTF_8 = "UTF-8"; - /** * UTF-8 {@link Charset} instance to prevent repeated * {@link Charset#forName(String)} lookups + * @deprecated Use {@link StandardCharsets#UTF_8} instead. */ - public static final Charset CHARSET_UTF_8 = Charset.forName("UTF-8"); + @Deprecated + public static final Charset CHARSET_UTF_8 = StandardCharsets.UTF_8; + + /** + * UTF-8 charset string. + *

        Where possible, use {@link StandardCharsets#UTF_8} instead, + * as using the String constant may slow things down. + * @see StandardCharsets#UTF_8 + */ + public static final String UTF_8 = StandardCharsets.UTF_8.name(); + private IOUtils() {} // no instance /** diff --git a/lucene/core/src/java/org/apache/lucene/util/SPIClassIterator.java b/lucene/core/src/java/org/apache/lucene/util/SPIClassIterator.java index 764713c88a6..5ea20c4aaeb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/SPIClassIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/SPIClassIterator.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; @@ -91,7 +92,7 @@ public final class SPIClassIterator implements Iterator> { final InputStream in = url.openStream(); IOException priorE = null; try { - final BufferedReader reader = new BufferedReader(new InputStreamReader(in, IOUtils.CHARSET_UTF_8)); + final BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { final int pos = line.indexOf('#'); diff --git a/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java b/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java index 70f5408781a..13f1b10f9ba 100644 --- a/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java @@ -1,5 +1,7 @@ package org.apache.lucene.util; +import java.nio.charset.StandardCharsets; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -88,7 +90,7 @@ package org.apache.lucene.util; /** * Class to encode java's UTF16 char[] into UTF8 byte[] * without always allocating a new byte[] as - * String.getBytes("UTF-8") does. + * String.getBytes(StandardCharsets.UTF_8) does. * * @lucene.internal */ @@ -292,7 +294,7 @@ public final class UnicodeUtil { private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) { try { String s1 = new String(source, offset, length); - String s2 = new String(result, 0, upto, "UTF-8"); + String s2 = new String(result, 0, upto, StandardCharsets.UTF_8); if (!s1.equals(s2)) { //System.out.println("DIFF: s1 len=" + s1.length()); //for(int i=0;i { /* if (bytes.length == 665) { - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); Util.toDot(this, w, false, false); w.close(); System.out.println("Wrote FST to out.dot"); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java index c7978fdb92b..1cfdb446227 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.compressing; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import com.carrotsearch.randomizedtesting.generators.RandomInts; @@ -81,7 +82,7 @@ public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompres public void testShortLiteralsAndMatchs() throws IOException { // literals and matchs lengths <= 15 - final byte[] decompressed = "1234562345673456745678910123".getBytes("UTF-8"); + final byte[] decompressed = "1234562345673456745678910123".getBytes(StandardCharsets.UTF_8); test(decompressed); } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java index 050770a486f..ada8380dcc6 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java @@ -23,7 +23,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.codecs.lucene46.Lucene46Codec; -import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; +import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.document.Document; @@ -44,7 +44,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.TestUtil; import org.junit.Test; /** @@ -204,14 +203,14 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { public static class MockCodec extends Lucene46Codec { final PostingsFormat lucene40 = new Lucene41PostingsFormat(); final PostingsFormat simpleText = new SimpleTextPostingsFormat(); - final PostingsFormat mockSep = new MockSepPostingsFormat(); + final PostingsFormat memory = new MemoryPostingsFormat(); @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return simpleText; } else if (field.equals("content")) { - return mockSep; + return memory; } else { return lucene40; } diff --git a/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java b/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java index c001f82306b..5819adcc1ca 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -1,5 +1,7 @@ package org.apache.lucene.document; +import java.nio.charset.StandardCharsets; + import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.RandomIndexWriter; @@ -38,7 +40,7 @@ public class TestBinaryDocument extends LuceneTestCase { { FieldType ft = new FieldType(); ft.setStored(true); - StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8")); + StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes(StandardCharsets.UTF_8)); Field stringFldStored = new Field("stringStored", binaryValStored, ft); Document doc = new Document(); @@ -63,7 +65,7 @@ public class TestBinaryDocument extends LuceneTestCase { /** fetch the binary stored field and compare it's content with the original one */ BytesRef bytes = docFromReader.getBinaryValue("binaryStored"); assertNotNull(bytes); - String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8"); + String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8); assertTrue(binaryFldStoredTest.equals(binaryValStored)); /** fetch the string field and compare it's content with the original one */ @@ -76,7 +78,7 @@ public class TestBinaryDocument extends LuceneTestCase { } public void testCompressionTools() throws Exception { - StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8"))); + StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes(StandardCharsets.UTF_8))); StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); Document doc = new Document(); @@ -95,7 +97,7 @@ public class TestBinaryDocument extends LuceneTestCase { assertTrue(docFromReader != null); /** fetch the binary compressed field and compare it's content with the original one */ - String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8"); + String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), StandardCharsets.UTF_8); assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); diff --git a/lucene/core/src/test/org/apache/lucene/document/TestDocument.java b/lucene/core/src/test/org/apache/lucene/document/TestDocument.java index e9c9e5be9a7..2d5d7592a94 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestDocument.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestDocument.java @@ -19,6 +19,7 @@ package org.apache.lucene.document; import java.io.IOException; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.List; import org.apache.lucene.analysis.MockTokenizer; @@ -53,8 +54,8 @@ public class TestDocument extends LuceneTestCase { FieldType ft = new FieldType(); ft.setStored(true); Field stringFld = new Field("string", binaryVal, ft); - StoredField binaryFld = new StoredField("binary", binaryVal.getBytes("UTF-8")); - StoredField binaryFld2 = new StoredField("binary", binaryVal2.getBytes("UTF-8")); + StoredField binaryFld = new StoredField("binary", binaryVal.getBytes(StandardCharsets.UTF_8)); + StoredField binaryFld2 = new StoredField("binary", binaryVal2.getBytes(StandardCharsets.UTF_8)); doc.add(stringFld); doc.add(binaryFld); diff --git a/lucene/core/src/test/org/apache/lucene/document/TestField.java b/lucene/core/src/test/org/apache/lucene/document/TestField.java index 2417c65069b..62be66aa8ce 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestField.java @@ -18,6 +18,8 @@ package org.apache.lucene.document; */ import java.io.StringReader; +import java.nio.charset.StandardCharsets; + import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.Token; import org.apache.lucene.util.BytesRef; @@ -184,7 +186,7 @@ public class TestField extends LuceneTestCase { trySetBoost(field); trySetByteValue(field); - field.setBytesValue("fubar".getBytes("UTF-8")); + field.setBytesValue("fubar".getBytes(StandardCharsets.UTF_8)); field.setBytesValue(new BytesRef("baz")); trySetDoubleValue(field); trySetIntValue(field); @@ -203,7 +205,7 @@ public class TestField extends LuceneTestCase { trySetBoost(field); trySetByteValue(field); - field.setBytesValue("fubar".getBytes("UTF-8")); + field.setBytesValue("fubar".getBytes(StandardCharsets.UTF_8)); field.setBytesValue(new BytesRef("baz")); trySetDoubleValue(field); trySetIntValue(field); @@ -294,15 +296,15 @@ public class TestField extends LuceneTestCase { public void testStoredFieldBytes() throws Exception { Field fields[] = new Field[] { - new StoredField("foo", "bar".getBytes("UTF-8")), - new StoredField("foo", "bar".getBytes("UTF-8"), 0, 3), + new StoredField("foo", "bar".getBytes(StandardCharsets.UTF_8)), + new StoredField("foo", "bar".getBytes(StandardCharsets.UTF_8), 0, 3), new StoredField("foo", new BytesRef("bar")), }; for (Field field : fields) { trySetBoost(field); trySetByteValue(field); - field.setBytesValue("baz".getBytes("UTF-8")); + field.setBytesValue("baz".getBytes(StandardCharsets.UTF_8)); field.setBytesValue(new BytesRef("baz")); trySetDoubleValue(field); trySetIntValue(field); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java index 27fd97ef298..50850fac27f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -1064,14 +1064,14 @@ public class TestAddIndexes extends LuceneTestCase { private static final class CustomPerFieldCodec extends Lucene46Codec { private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText"); private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); - private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep"); + private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory"); @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.equals("id")) { return simpleTextFormat; } else if (field.equals("content")) { - return mockSepFormat; + return memoryFormat; } else { return defaultFormat; } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java new file mode 100644 index 00000000000..7f7e9bf1197 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java @@ -0,0 +1,90 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.store.CompoundFileDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +/** + * Test that a plain default puts CRC32 footers in all files. + */ +public class TestAllFilesHaveChecksumFooter extends LuceneTestCase { + public void test() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setCodec(new Lucene46Codec()); + RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); + Document doc = new Document(); + // these fields should sometimes get term vectors, etc + Field idField = newStringField("id", "", Field.Store.NO); + Field bodyField = newTextField("body", "", Field.Store.NO); + Field dvField = new NumericDocValuesField("dv", 5); + doc.add(idField); + doc.add(bodyField); + doc.add(dvField); + for (int i = 0; i < 100; i++) { + idField.setStringValue(Integer.toString(i)); + bodyField.setStringValue(TestUtil.randomUnicodeString(random())); + riw.addDocument(doc); + if (random().nextInt(7) == 0) { + riw.commit(); + } + if (random().nextInt(20) == 0) { + riw.deleteDocuments(new Term("id", Integer.toString(i))); + } + } + riw.close(); + checkHeaders(dir); + dir.close(); + } + + private void checkHeaders(Directory dir) throws IOException { + for (String file : dir.listAll()) { + if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { + CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false); + checkHeaders(cfsDir); // recurse into cfs + cfsDir.close(); + } + IndexInput in = null; + boolean success = false; + try { + in = dir.openInput(file, newIOContext(random())); + CodecUtil.checksumEntireFile(in); + success = true; + } finally { + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); + } + } + } + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java index defea12a415..e4c998eb861 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java @@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.lucene46.Lucene46Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -39,14 +40,15 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); conf.setCodec(new Lucene46Codec()); - // riw should sometimes create docvalues fields, etc RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = newStringField("id", "", Field.Store.NO); Field bodyField = newTextField("body", "", Field.Store.NO); + Field dvField = new NumericDocValuesField("dv", 5); doc.add(idField); doc.add(bodyField); + doc.add(dvField); for (int i = 0; i < 100; i++) { idField.setStringValue(Integer.toString(i)); bodyField.setStringValue(TestUtil.randomUnicodeString(random())); @@ -54,6 +56,10 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase { if (random().nextInt(7) == 0) { riw.commit(); } + // TODO: we should make a new format with a clean header... + // if (random().nextInt(20) == 0) { + // riw.deleteDocuments(new Term("id", Integer.toString(i))); + // } } riw.close(); checkHeaders(dir); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 3ba113c417e..4c82757e2c2 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -60,6 +60,7 @@ import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.StringHelper; @@ -76,7 +77,7 @@ import org.junit.BeforeClass; // we won't even be running the actual code, only the impostor // @SuppressCodecs("Lucene4x") // Sep codec cannot yet handle the offsets in our 4.x index! -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Lucene42", "Lucene45"}) +@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42", "Lucene45"}) public class TestBackwardsCompatibility extends LuceneTestCase { // Uncomment these cases & run them on an older Lucene version, @@ -292,13 +293,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos, false, "UTF-8")); + checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); CheckIndex.Status indexStatus = checker.checkIndex(); assertFalse(indexStatus.clean); - assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName())); + assertTrue(bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName())); dir.close(); - TestUtil.rmDir(oldIndxeDir); + TestUtil.rm(oldIndxeDir); } } @@ -597,7 +598,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { public File createIndex(String dirName, boolean doCFS, boolean fullyMerged) throws IOException { // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: File indexDir = new File("/tmp/idx", dirName); - TestUtil.rmDir(indexDir); + TestUtil.rm(indexDir); Directory dir = newFSDirectory(indexDir); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setNoCFSRatio(doCFS ? 1.0 : 0.0); @@ -646,7 +647,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { String outputDirName = "lucene.backwardscompat0.index"; File outputDir = TestUtil.createTempDir(outputDirName); - TestUtil.rmDir(outputDir); + TestUtil.rm(outputDir); try { Directory dir = newFSDirectory(outputDir); @@ -704,7 +705,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } dir.close(); } finally { - TestUtil.rmDir(outputDir); + TestUtil.rm(outputDir); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBagOfPositions.java b/lucene/core/src/test/org/apache/lucene/index/TestBagOfPositions.java index 4df23640c64..70bdfa54794 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBagOfPositions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBagOfPositions.java @@ -87,7 +87,7 @@ public class TestBagOfPositions extends LuceneTestCase { if (options == 0) { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); // we dont actually need positions fieldType.setStoreTermVectors(true); // but enforce term vectors when we do this so we check SOMETHING - } else if (options == 1 && !doesntSupportOffsets.contains(TestUtil.getPostingsFormat("field"))) { + } else if (options == 1) { fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); } // else just positions diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java index 9d13b1aebbe..67b719e7fea 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java @@ -20,9 +20,11 @@ package org.apache.lucene.index; import java.io.IOException; import java.io.ByteArrayOutputStream; import java.io.PrintStream; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.ArrayList; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.CannedTokenStream; @@ -54,12 +56,12 @@ public class TestCheckIndex extends LuceneTestCase { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos, false, "UTF-8")); + checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); if (VERBOSE) checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.checkIndex(); if (indexStatus.clean == false) { System.out.println("CheckIndex failed"); - System.out.println(bos.toString("UTF-8")); + System.out.println(bos.toString(IOUtils.UTF_8)); fail(); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 5b33629d3b7..21f1c6d0f33 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -29,7 +29,6 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.lucene40.Lucene40RWCodec; import org.apache.lucene.codecs.lucene41.Lucene41RWCodec; import org.apache.lucene.codecs.lucene42.Lucene42RWCodec; -import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; @@ -336,54 +335,6 @@ public class TestCodecs extends LuceneTestCase { dir.close(); } - public void testSepPositionAfterMerge() throws IOException { - final Directory dir = newDirectory(); - final IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())); - config.setMergePolicy(newLogMergePolicy()); - config.setCodec(TestUtil.alwaysPostingsFormat(new MockSepPostingsFormat())); - final IndexWriter writer = new IndexWriter(dir, config); - - try { - final PhraseQuery pq = new PhraseQuery(); - pq.add(new Term("content", "bbb")); - pq.add(new Term("content", "ccc")); - - final Document doc = new Document(); - FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); - customType.setOmitNorms(true); - doc.add(newField("content", "aaa bbb ccc ddd", customType)); - - // add document and force commit for creating a first segment - writer.addDocument(doc); - writer.commit(); - - ScoreDoc[] results = this.search(writer, pq, 5); - assertEquals(1, results.length); - assertEquals(0, results[0].doc); - - // add document and force commit for creating a second segment - writer.addDocument(doc); - writer.commit(); - - // at this point, there should be at least two segments - results = this.search(writer, pq, 5); - assertEquals(2, results.length); - assertEquals(0, results[0].doc); - - writer.forceMerge(1); - - // optimise to merge the segments. - results = this.search(writer, pq, 5); - assertEquals(2, results.length); - assertEquals(0, results[0].doc); - } - finally { - writer.close(); - dir.close(); - } - } - private ScoreDoc[] search(final IndexWriter writer, final Query q, final int n) throws IOException { final IndexReader reader = writer.getReader(); final IndexSearcher searcher = newSearcher(reader); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 2a786f42cc8..80956f99dbe 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -25,6 +25,7 @@ import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; @@ -89,7 +90,7 @@ public class TestDoc extends LuceneTestCase { File f = new File(workDir, name); if (f.exists()) f.delete(); - fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); + fw = new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8); pw = new PrintWriter(fw); pw.println(text); return f; @@ -200,7 +201,7 @@ public class TestDoc extends LuceneTestCase { { File file = new File(workDir, fileName); Document doc = new Document(); - InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8"); + InputStreamReader is = new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8); doc.add(new TextField("contents", is)); writer.addDocument(doc); writer.commit(); @@ -221,7 +222,7 @@ public class TestDoc extends LuceneTestCase { SegmentMerger merger = new SegmentMerger(Arrays.asList(r1, r2), si, InfoStream.getDefault(), trackingDir, - MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context); + MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.merge(); r1.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocInverterPerFieldErrorInfo.java b/lucene/core/src/test/org/apache/lucene/index/TestDocInverterPerFieldErrorInfo.java index 3df1ebc7db4..c2963c97e68 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocInverterPerFieldErrorInfo.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocInverterPerFieldErrorInfo.java @@ -26,6 +26,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.PrintStreamInfoStream; import org.junit.Test; @@ -70,7 +71,7 @@ public class TestDocInverterPerFieldErrorInfo extends LuceneTestCase { IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); - PrintStream infoPrintStream = new PrintStream(infoBytes, true, "utf-8"); + PrintStream infoPrintStream = new PrintStream(infoBytes, true, IOUtils.UTF_8); PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream); c.setInfoStream(printStreamInfoStream); writer = new IndexWriter(dir, c); @@ -81,7 +82,7 @@ public class TestDocInverterPerFieldErrorInfo extends LuceneTestCase { fail("Failed to fail."); } catch(BadNews badNews) { infoPrintStream.flush(); - String infoStream = new String(infoBytes.toByteArray(), "utf-8"); + String infoStream = new String(infoBytes.toByteArray(), IOUtils.UTF_8); assertTrue(infoStream.contains("distinctiveFieldName")); } @@ -95,7 +96,7 @@ public class TestDocInverterPerFieldErrorInfo extends LuceneTestCase { IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); final ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); - PrintStream infoPrintStream = new PrintStream(infoBytes, true, "utf-8"); + PrintStream infoPrintStream = new PrintStream(infoBytes, true, IOUtils.UTF_8); PrintStreamInfoStream printStreamInfoStream = new PrintStreamInfoStream(infoPrintStream); c.setInfoStream(printStreamInfoStream); writer = new IndexWriter(dir, c); @@ -107,7 +108,7 @@ public class TestDocInverterPerFieldErrorInfo extends LuceneTestCase { fail("Unwanted exception"); } infoPrintStream.flush(); - String infoStream = new String(infoBytes.toByteArray(), "utf-8"); + String infoStream = new String(infoBytes.toByteArray(), IOUtils.UTF_8); assertFalse(infoStream.contains("boringFieldName")); writer.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index caec565055b..f5e3dbf3e3f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -43,6 +44,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -1173,10 +1175,10 @@ public class TestIndexWriterDelete extends LuceneTestCase { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos, false, "UTF-8"), false); + checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false); CheckIndex.Status indexStatus = checker.checkIndex(null); assertTrue(indexStatus.clean); - String s = bos.toString("UTF-8"); + String s = bos.toString(IOUtils.UTF_8); // Segment should have deletions: assertTrue(s.contains("has deletions")); @@ -1185,10 +1187,10 @@ public class TestIndexWriterDelete extends LuceneTestCase { w.close(); bos = new ByteArrayOutputStream(1024); - checker.setInfoStream(new PrintStream(bos, false, "UTF-8"), false); + checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false); indexStatus = checker.checkIndex(null); assertTrue(indexStatus.clean); - s = bos.toString("UTF-8"); + s = bos.toString(IOUtils.UTF_8); assertFalse(s.contains("has deletions")); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java index be3170d8660..1c202cfa7bd 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Iterator; import java.util.Random; @@ -181,7 +182,7 @@ public class TestIndexWriterUnicode extends LuceneTestCase { UnicodeUtil.UTF16toUTF8(buffer, 0, 20, utf8); if (!hasIllegal) { - byte[] b = new String(buffer, 0, 20).getBytes("UTF-8"); + byte[] b = new String(buffer, 0, 20).getBytes(StandardCharsets.UTF_8); assertEquals(b.length, utf8.length); for(int i=0;iasList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, - MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random())); + MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true); MergeState mergeState = merger.merge(); int docsMerged = mergeState.segmentInfo.getDocCount(); assertTrue(docsMerged == 2); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index f78f6f84bf9..e490518412e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -60,6 +60,7 @@ import org.apache.lucene.search.FieldCache.Longs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.TestUtil; @@ -139,7 +140,7 @@ public class TestFieldCache extends LuceneTestCase { try { FieldCache cache = FieldCache.DEFAULT; ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - cache.setInfoStream(new PrintStream(bos, false, "UTF-8")); + cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); cache.getDoubles(reader, "theDouble", false); cache.getFloats(reader, "theDouble", new FieldCache.FloatParser() { @Override @@ -151,7 +152,7 @@ public class TestFieldCache extends LuceneTestCase { return NumericUtils.sortableIntToFloat((int) NumericUtils.prefixCodedToLong(term)); } }, false); - assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1); + assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1); } finally { FieldCache.DEFAULT.purgeAllCaches(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 123b256ba70..1e918d56d3c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.Collection; import org.apache.lucene.analysis.*; @@ -249,7 +250,7 @@ public class TestPositionIncrement extends LuceneTestCase { for (byte[] bytes : payloads) { count++; if (VERBOSE) { - System.out.println(" payload: " + new String(bytes, "UTF-8")); + System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8)); } } } @@ -276,7 +277,7 @@ public class TestPositionIncrement extends LuceneTestCase { Collection pls = psu.getPayloadsForQuery(snq); count = pls.size(); for (byte[] bytes : pls) { - String s = new String(bytes, "UTF-8"); + String s = new String(bytes, StandardCharsets.UTF_8); //System.out.println(s); sawZero |= s.equals("pos: 0"); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java index 53314e84b42..d5ca61b430e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -80,7 +81,7 @@ public class TestBasics extends LuceneTestCase { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8"))); + payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8))); pos++; return true; } else { @@ -482,7 +483,7 @@ public class TestBasics extends LuceneTestCase { @Test public void testSpanPayloadCheck() throws Exception { SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); - BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8")); + BytesRef pay = new BytesRef(("pos: " + 5).getBytes(StandardCharsets.UTF_8)); SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes)); checkHits(query, new int[] {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995}); @@ -497,8 +498,8 @@ public class TestBasics extends LuceneTestCase { clauses[0] = term1; clauses[1] = term2; snq = new SpanNearQuery(clauses, 0, true); - pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); - pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); + pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8)); + pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8)); list = new ArrayList<>(); list.add(pay.bytes); list.add(pay2.bytes); @@ -510,9 +511,9 @@ public class TestBasics extends LuceneTestCase { clauses[1] = term2; clauses[2] = new SpanTermQuery(new Term("field", "five")); snq = new SpanNearQuery(clauses, 0, true); - pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); - pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); - BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8")); + pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8)); + pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8)); + BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes(StandardCharsets.UTF_8)); list = new ArrayList<>(); list.add(pay.bytes); list.add(pay2.bytes); @@ -541,10 +542,10 @@ public class TestBasics extends LuceneTestCase { checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); Collection payloads = new ArrayList<>(); - BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); - BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); - BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8")); - BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8")); + BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8)); + BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8)); + BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8)); + BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8)); payloads.add(pay.bytes); payloads.add(pay2.bytes); payloads.add(pay3.bytes); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index 485430b5ed7..ecd16c7d132 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -276,7 +277,7 @@ public class TestPayloadSpans extends LuceneTestCase { Collection payloads = spans.getPayload(); for (final byte [] payload : payloads) { - payloadSet.add(new String(payload, "UTF-8")); + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); } } } @@ -311,7 +312,7 @@ public class TestPayloadSpans extends LuceneTestCase { while (spans.next()) { Collection payloads = spans.getPayload(); for (final byte[] payload : payloads) { - payloadSet.add(new String(payload, "UTF-8")); + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); } } } @@ -347,7 +348,7 @@ public class TestPayloadSpans extends LuceneTestCase { Collection payloads = spans.getPayload(); for (final byte [] payload : payloads) { - payloadSet.add(new String(payload, "UTF-8")); + payloadSet.add(new String(payload, StandardCharsets.UTF_8)); } } } @@ -382,7 +383,7 @@ public class TestPayloadSpans extends LuceneTestCase { if(VERBOSE) { System.out.println("Num payloads:" + payloads.size()); for (final byte [] bytes : payloads) { - System.out.println(new String(bytes, "UTF-8")); + System.out.println(new String(bytes, StandardCharsets.UTF_8)); } } reader.close(); @@ -455,7 +456,7 @@ public class TestPayloadSpans extends LuceneTestCase { System.out.println("payloads for span:" + payload.size()); for (final byte [] bytes : payload) { System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " - + new String(bytes, "UTF-8")); + + new String(bytes, StandardCharsets.UTF_8)); } } diff --git a/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java b/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java new file mode 100644 index 00000000000..c7915d40b96 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/store/TestBufferedChecksum.java @@ -0,0 +1,68 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.zip.CRC32; +import java.util.zip.Checksum; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestBufferedChecksum extends LuceneTestCase { + + public void testSimple() { + Checksum c = new BufferedChecksum(new CRC32()); + c.update(1); + c.update(2); + c.update(3); + assertEquals(1438416925L, c.getValue()); + } + + public void testRandom() { + Checksum c1 = new CRC32(); + Checksum c2 = new BufferedChecksum(new CRC32()); + int iterations = atLeast(10000); + for (int i = 0; i < iterations; i++) { + switch(random().nextInt(4)) { + case 0: + // update(byte[], int, int) + int length = random().nextInt(1024); + byte bytes[] = new byte[length]; + random().nextBytes(bytes); + c1.update(bytes, 0, bytes.length); + c2.update(bytes, 0, bytes.length); + break; + case 1: + // update(int) + int b = random().nextInt(256); + c1.update(b); + c2.update(b); + break; + case 2: + // reset() + c1.reset(); + c2.reset(); + break; + case 3: + // getValue() + assertEquals(c1.getValue(), c2.getValue()); + break; + } + } + assertEquals(c1.getValue(), c2.getValue()); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java index 577315be3df..42941d419b2 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestFilterDirectory.java @@ -30,12 +30,13 @@ public class TestFilterDirectory extends LuceneTestCase { public void testOverrides() throws Exception { // verify that all methods of Directory are overridden by FilterDirectory, // except those under the 'exclude' list - Set exclude = new HashSet<>(); - exclude.add("copy"); - exclude.add("createSlicer"); + Set exclude = new HashSet<>(); + exclude.add(Directory.class.getMethod("copy", Directory.class, String.class, String.class, IOContext.class)); + exclude.add(Directory.class.getMethod("createSlicer", String.class, IOContext.class)); + exclude.add(Directory.class.getMethod("openChecksumInput", String.class, IOContext.class)); for (Method m : FilterDirectory.class.getMethods()) { if (m.getDeclaringClass() == Directory.class) { - assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m.getName())); + assertTrue("method " + m.getName() + " not overridden!", exclude.contains(m)); } } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java b/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java index 8dd4144de98..d8154169d2d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java @@ -17,8 +17,11 @@ package org.apache.lucene.util; * limitations under the License. */ +import java.util.concurrent.CountDownLatch; + import org.apache.lucene.util.junitcompat.WithNestedTests; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.runner.Description; @@ -27,8 +30,13 @@ import org.junit.runner.Result; import org.junit.runner.notification.Failure; import org.junit.runner.notification.RunListener; -import com.carrotsearch.randomizedtesting.SysGlobals; import com.carrotsearch.randomizedtesting.annotations.Repeat; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence; import com.carrotsearch.randomizedtesting.rules.SystemPropertiesInvariantRule; import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; @@ -66,48 +74,118 @@ public class TestMaxFailuresRule extends WithNestedTests { @Test public void testMaxFailures() { - TestRuleIgnoreAfterMaxFailures newRule = new TestRuleIgnoreAfterMaxFailures(2); - TestRuleIgnoreAfterMaxFailures prevRule = LuceneTestCase.replaceMaxFailureRule(newRule); - System.clearProperty(SysGlobals.SYSPROP_ITERATIONS()); - try { - JUnitCore core = new JUnitCore(); - final StringBuilder results = new StringBuilder(); - core.addListener(new RunListener() { - char lastTest; + LuceneTestCase.replaceMaxFailureRule(new TestRuleIgnoreAfterMaxFailures(2)); + JUnitCore core = new JUnitCore(); + final StringBuilder results = new StringBuilder(); + core.addListener(new RunListener() { + char lastTest; - @Override - public void testStarted(Description description) throws Exception { - lastTest = 'S'; // success. - } + @Override + public void testStarted(Description description) throws Exception { + lastTest = 'S'; // success. + } - @Override - public void testAssumptionFailure(Failure failure) { - lastTest = 'A'; // assumption failure. - } + @Override + public void testAssumptionFailure(Failure failure) { + lastTest = 'A'; // assumption failure. + } - @Override - public void testFailure(Failure failure) throws Exception { - lastTest = 'F'; // failure - } + @Override + public void testFailure(Failure failure) throws Exception { + lastTest = 'F'; // failure + } - @Override - public void testFinished(Description description) throws Exception { - results.append(lastTest); - } - }); + @Override + public void testFinished(Description description) throws Exception { + results.append(lastTest); + } + }); - Result result = core.run(Nested.class); - Assert.assertEquals(500, result.getRunCount()); - Assert.assertEquals(0, result.getIgnoreCount()); - Assert.assertEquals(2, result.getFailureCount()); + Result result = core.run(Nested.class); + Assert.assertEquals(500, result.getRunCount()); + Assert.assertEquals(0, result.getIgnoreCount()); + Assert.assertEquals(2, result.getFailureCount()); - // Make sure we had exactly two failures followed by assumption-failures - // resulting from ignored tests. - Assert.assertTrue(results.toString(), - results.toString().matches("(S*F){2}A+")); + // Make sure we had exactly two failures followed by assumption-failures + // resulting from ignored tests. + Assert.assertTrue(results.toString(), + results.toString().matches("(S*F){2}A+")); + } - } finally { - LuceneTestCase.replaceMaxFailureRule(prevRule); + @ThreadLeakZombies(Consequence.IGNORE_REMAINING_TESTS) + @ThreadLeakAction({ThreadLeakAction.Action.WARN}) + @ThreadLeakScope(Scope.TEST) + @ThreadLeakLingering(linger = 500) + public static class Nested2 extends WithNestedTests.AbstractNestedTest { + public static final int TOTAL_ITERS = 10; + public static CountDownLatch die; + public static Thread zombie; + public static int testNum; + + @BeforeClass + public static void setup() { + assert zombie == null; + die = new CountDownLatch(1); + testNum = 0; + } + + @Repeat(iterations = TOTAL_ITERS) + public void testLeaveZombie() { + if (++testNum == 2) { + zombie = new Thread() { + @Override + public void run() { + while (true) { + try { + die.await(); + return; + } catch (Exception e) { /* ignore */ } + } + } + }; + zombie.start(); + } } } + + @Test + public void testZombieThreadFailures() throws Exception { + LuceneTestCase.replaceMaxFailureRule(new TestRuleIgnoreAfterMaxFailures(1)); + JUnitCore core = new JUnitCore(); + final StringBuilder results = new StringBuilder(); + core.addListener(new RunListener() { + char lastTest; + + @Override + public void testStarted(Description description) throws Exception { + lastTest = 'S'; // success. + } + + @Override + public void testAssumptionFailure(Failure failure) { + lastTest = 'A'; // assumption failure. + } + + @Override + public void testFailure(Failure failure) throws Exception { + lastTest = 'F'; // failure + System.out.println(failure.getMessage()); + } + + @Override + public void testFinished(Description description) throws Exception { + results.append(lastTest); + } + }); + + Result result = core.run(Nested2.class); + if (Nested2.die != null) { + Nested2.die.countDown(); + Nested2.zombie.join(); + } + + super.prevSysOut.println(results.toString()); + Assert.assertEquals(Nested2.TOTAL_ITERS, result.getRunCount()); + Assert.assertEquals(results.toString(), "SFAAAAAAAA", results.toString()); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestOfflineSorter.java b/lucene/core/src/test/org/apache/lucene/util/TestOfflineSorter.java index 425059a267b..221d361d570 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestOfflineSorter.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestOfflineSorter.java @@ -21,6 +21,7 @@ import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -43,14 +44,14 @@ public class TestOfflineSorter extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); tempDir = TestUtil.createTempDir("mergesort"); - TestUtil.rmDir(tempDir); + TestUtil.rm(tempDir); tempDir.mkdirs(); } @Override public void tearDown() throws Exception { if (tempDir != null) - TestUtil.rmDir(tempDir); + TestUtil.rm(tempDir); super.tearDown(); } @@ -60,7 +61,7 @@ public class TestOfflineSorter extends LuceneTestCase { public void testSingleLine() throws Exception { checkSort(new OfflineSorter(), new byte [][] { - "Single line only.".getBytes("UTF-8") + "Single line only.".getBytes(StandardCharsets.UTF_8) }); } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java index e435fe6f6ae..be40c6c00f1 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.automaton; * limitations under the License. */ +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -56,7 +57,7 @@ public class TestDeterminizeLexicon extends LuceneTestCase { } final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex); for (String s : terms) { - byte bytes[] = s.getBytes("UTF-8"); + byte bytes[] = s.getBytes(StandardCharsets.UTF_8); assertTrue(lexByte.run(bytes, 0, bytes.length)); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java index 68c75a1f8cb..d5faa4dfef9 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; +import java.nio.charset.StandardCharsets; import java.util.Random; public class TestUTF32ToUTF8 extends LuceneTestCase { @@ -184,7 +185,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase { assertTrue(cra.run(input)); - byte[] bytes = input.getBytes("UTF-8"); + byte[] bytes = input.getBytes(StandardCharsets.UTF_8); assertTrue(bra.run(bytes, 0, bytes.length)); // this one fails! } @@ -197,7 +198,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase { assertTrue(cra.run(input)); - byte[] bytes = input.getBytes("UTF-8"); + byte[] bytes = input.getBytes(StandardCharsets.UTF_8); assertTrue(bra.run(bytes, 0, bytes.length)); } @@ -232,7 +233,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase { throw e; } } - byte bytes[] = string.getBytes("UTF-8"); + byte bytes[] = string.getBytes(StandardCharsets.UTF_8); assertEquals(cra.run(string), bra.run(bytes, 0, bytes.length)); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index b8966e1427a..96d032fe758 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -63,6 +63,7 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -474,7 +475,7 @@ public class TestFSTs extends LuceneTestCase { protected abstract T getOutput(IntsRef input, int ord) throws IOException; public void run(int limit, boolean verify, boolean verifyByOutput) throws IOException { - BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536); + BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), StandardCharsets.UTF_8), 65536); try { final IntsRef intsRef = new IntsRef(10); long tStart = System.currentTimeMillis(); @@ -517,7 +518,7 @@ public class TestFSTs extends LuceneTestCase { System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes()); if (fst.getNodeCount() < 100) { - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); Util.toDot(fst, w, false, false); w.close(); System.out.println("Wrote FST to out.dot"); @@ -544,7 +545,7 @@ public class TestFSTs extends LuceneTestCase { while(true) { for(int iter=0;iter<2;iter++) { is.close(); - is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536); + is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), StandardCharsets.UTF_8), 65536); ord = 0; tStart = System.currentTimeMillis(); diff --git a/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java b/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java index 48792f175e6..a78c32b70c2 100644 --- a/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java +++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java @@ -20,8 +20,11 @@ package org.apache.lucene.util.junitcompat; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.List; +import org.apache.lucene.util.FailureMarker; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures; import org.apache.lucene.util.TestRuleIgnoreTestSuites; @@ -35,6 +38,9 @@ import org.junit.Rule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; +import com.carrotsearch.randomizedtesting.RandomizedRunner; +import com.carrotsearch.randomizedtesting.RandomizedTest; +import com.carrotsearch.randomizedtesting.SysGlobals; import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import com.carrotsearch.randomizedtesting.rules.TestRuleAdapter; @@ -74,22 +80,36 @@ public abstract class WithNestedTests { private TestRuleIgnoreAfterMaxFailures prevRule; protected void before() throws Throwable { - String filter = System.getProperty("tests.filter"); - if (filter != null && !filter.trim().isEmpty()) { - // We're running with a complex test filter. This will affect nested tests anyway - // so ignore them. + if (!isPropertyEmpty(SysGlobals.SYSPROP_TESTFILTER()) || + !isPropertyEmpty(SysGlobals.SYSPROP_TESTCLASS()) || + !isPropertyEmpty(SysGlobals.SYSPROP_TESTMETHOD()) || + !isPropertyEmpty(SysGlobals.SYSPROP_ITERATIONS())) { + // We're running with a complex test filter that is properly handled by classes + // which are executed by RandomizedRunner. The "outer" classes testing LuceneTestCase + // itself are executed by the default JUnit runner and would be always executed. + // We thus always skip execution if any filtering is detected. Assume.assumeTrue(false); } + // Check zombie threads from previous suites. Don't run if zombies are around. + RandomizedTest.assumeFalse(RandomizedRunner.hasZombieThreads()); + TestRuleIgnoreAfterMaxFailures newRule = new TestRuleIgnoreAfterMaxFailures(Integer.MAX_VALUE); prevRule = LuceneTestCase.replaceMaxFailureRule(newRule); + RandomizedTest.assumeFalse(FailureMarker.hadFailures()); } protected void afterAlways(List errors) throws Throwable { if (prevRule != null) { LuceneTestCase.replaceMaxFailureRule(prevRule); } + FailureMarker.resetFailures(); } + + private boolean isPropertyEmpty(String propertyName) { + String value = System.getProperty(propertyName); + return value == null || value.trim().isEmpty(); + } }); /** @@ -121,14 +141,15 @@ public abstract class WithNestedTests { try { sysout = new ByteArrayOutputStream(); - System.setOut(new PrintStream(sysout, true, "UTF-8")); + System.setOut(new PrintStream(sysout, true, IOUtils.UTF_8)); syserr = new ByteArrayOutputStream(); - System.setErr(new PrintStream(syserr, true, "UTF-8")); + System.setErr(new PrintStream(syserr, true, IOUtils.UTF_8)); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } + FailureMarker.resetFailures(); System.setProperty(TestRuleIgnoreTestSuites.PROPERTY_RUN_NESTED, "true"); } @@ -146,20 +167,12 @@ public abstract class WithNestedTests { protected String getSysOut() { Assert.assertTrue(suppressOutputStreams); System.out.flush(); - try { - return new String(sysout.toByteArray(), "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + return new String(sysout.toByteArray(), StandardCharsets.UTF_8); } protected String getSysErr() { Assert.assertTrue(suppressOutputStreams); System.err.flush(); - try { - return new String(syserr.toByteArray(), "UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + return new String(syserr.toByteArray(), StandardCharsets.UTF_8); } } diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 5f1e48d66e8..cfbfc43a8ab 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -38,6 +38,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Date; /** Index all text files under a directory. @@ -191,7 +192,7 @@ public class IndexFiles { // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. - doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); + doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): diff --git a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java index 621ecf484c7..95b5975f4ec 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Date; import org.apache.lucene.analysis.Analyzer; @@ -95,9 +96,9 @@ public class SearchFiles { BufferedReader in = null; if (queries != null) { - in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); + in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), StandardCharsets.UTF_8)); } else { - in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); + in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_50, field, analyzer); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index bda9d3f1dad..79983eab4f5 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -21,9 +21,11 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Enumeration; import java.util.Properties; import java.util.StringTokenizer; + import javax.servlet.RequestDispatcher; import javax.servlet.ServletConfig; import javax.servlet.ServletException; @@ -49,7 +51,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; /** @@ -136,7 +137,7 @@ public class FormBasedXmlQueryDemo extends HttpServlet { IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); - BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); + BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8)); String line = br.readLine(); final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java index 0e3f730bb13..8137057599a 100644 --- a/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java +++ b/lucene/expressions/src/java/org/apache/lucene/expressions/js/JavascriptCompiler.java @@ -22,6 +22,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.lang.reflect.Modifier; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.Arrays; import java.util.Collections; @@ -503,7 +504,7 @@ public class JavascriptCompiler { try { final Properties props = new Properties(); try (Reader in = IOUtils.getDecodingReader(JavascriptCompiler.class, - JavascriptCompiler.class.getSimpleName() + ".properties", IOUtils.CHARSET_UTF_8)) { + JavascriptCompiler.class.getSimpleName() + ".properties", StandardCharsets.UTF_8)) { props.load(in); } for (final String call : props.stringPropertyNames()) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java b/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java index ee21a8c211b..eaf78f7025d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java @@ -167,6 +167,7 @@ public class SlowRAMDirectory extends RAMDirectory { @Override public void flush() throws IOException { io.flush(); } @Override public long getFilePointer() { return io.getFilePointer(); } @Override public long length() throws IOException { return io.length(); } + @Override public long getChecksum() throws IOException { return io.getChecksum(); } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java index a63a27a6259..6f85c6ab12d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java @@ -132,8 +132,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // Smoke test PrintTaxonomyStats: ByteArrayOutputStream bos = new ByteArrayOutputStream(); - PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, "UTF-8"), true); - String result = bos.toString("UTF-8"); + PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, IOUtils.UTF_8), true); + String result = bos.toString(IOUtils.UTF_8); assertTrue(result.indexOf("/Author: 4 immediate children; 5 total categories") != -1); assertTrue(result.indexOf("/Publish Date: 3 immediate children; 12 total categories") != -1); // Make sure at least a few nodes of the tree came out: diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java index 63aaeb42c1b..0a86e58ad84 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCharBlockArray.java @@ -8,11 +8,11 @@ import java.io.FileOutputStream; import java.nio.ByteBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil; + import org.junit.Test; /* @@ -47,7 +47,7 @@ public class TestCharBlockArray extends FacetTestCase { int size = 1 + random().nextInt(50); // This test is turning random bytes into a string, // this is asking for trouble. - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString(); @@ -60,7 +60,7 @@ public class TestCharBlockArray extends FacetTestCase { int size = 1 + random().nextInt(50); // This test is turning random bytes into a string, // this is asking for trouble. - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString(); @@ -73,7 +73,7 @@ public class TestCharBlockArray extends FacetTestCase { int size = 1 + random().nextInt(50); // This test is turning random bytes into a string, // this is asking for trouble. - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); String s = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCompactLabelToOrdinal.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCompactLabelToOrdinal.java index b5805e7e4ee..c29c849e56d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCompactLabelToOrdinal.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/TestCompactLabelToOrdinal.java @@ -4,15 +4,15 @@ import java.io.File; import java.nio.ByteBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.Random; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil; + import org.junit.Test; /* @@ -53,7 +53,7 @@ public class TestCompactLabelToOrdinal extends FacetTestCase { // This test is turning random bytes into a string, // this is asking for trouble. - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onUnmappableCharacter(CodingErrorAction.REPLACE) .onMalformedInput(CodingErrorAction.REPLACE); uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString(); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 706fcc63161..40c1c28183c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -20,12 +20,14 @@ package org.apache.lucene.search.highlight; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.StringTokenizer; + import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -1512,7 +1514,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // now an ugly built of XML parsing to test the snippet is encoded OK DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); - org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes("UTF-8"))); + org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8))); Element root = doc.getDocumentElement(); NodeList nodes = root.getElementsByTagName("body"); Element body = (Element) nodes.item(0); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java index 29210a0223c..3ba2fef5f3b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestMultiTermHighlighting.java @@ -51,13 +51,11 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /** * Some tests that override {@link PostingsHighlighter#getIndexAnalyzer} to * highlight wilcard, fuzzy, etc queries. */ -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) public class TestMultiTermHighlighting extends LuceneTestCase { public void testWildcards() throws Exception { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java index 9bb6f5349cb..69f4d9deee0 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java @@ -20,6 +20,7 @@ package org.apache.lucene.search.postingshighlight; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.BreakIterator; import java.util.Arrays; import java.util.Map; @@ -48,10 +49,8 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) public class TestPostingsHighlighter extends LuceneTestCase { public void testBasics() throws Exception { @@ -489,7 +488,7 @@ public class TestPostingsHighlighter extends LuceneTestCase { public void testCambridgeMA() throws Exception { BufferedReader r = new BufferedReader(new InputStreamReader( - this.getClass().getResourceAsStream("CambridgeMA.utf8"), "UTF-8")); + this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8)); String text = r.readLine(); r.close(); Directory dir = newDirectory(); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java index 3af9eb1cc38..7939b03409e 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java @@ -43,10 +43,8 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) public class TestPostingsHighlighterRanking extends LuceneTestCase { /** * indexes a bunch of gibberish, and then highlights top(n). diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 83c003d8237..93bbee7e01d 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -8,7 +8,7 @@ /cglib/cglib-nodep = 2.2 /com.adobe.xmp/xmpcore = 5.1.2 -com.carrotsearch.randomizedtesting.version = 2.1.1 +com.carrotsearch.randomizedtesting.version = 2.1.3 /com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version} /com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version} diff --git a/lucene/licenses/junit4-ant-2.1.1.jar.sha1 b/lucene/licenses/junit4-ant-2.1.1.jar.sha1 deleted file mode 100644 index 4340e4c8609..00000000000 --- a/lucene/licenses/junit4-ant-2.1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a8a7371e11a8b3a4a3eeea81ad3cedafe3e3550e diff --git a/lucene/licenses/junit4-ant-2.1.3.jar.sha1 b/lucene/licenses/junit4-ant-2.1.3.jar.sha1 new file mode 100644 index 00000000000..c2d6fa49fa8 --- /dev/null +++ b/lucene/licenses/junit4-ant-2.1.3.jar.sha1 @@ -0,0 +1 @@ +8636804644d4ae3874f0efaa98978887e171cd55 diff --git a/lucene/licenses/randomizedtesting-runner-2.1.1.jar.sha1 b/lucene/licenses/randomizedtesting-runner-2.1.1.jar.sha1 deleted file mode 100644 index 2923eedf9fe..00000000000 --- a/lucene/licenses/randomizedtesting-runner-2.1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5908c4e714dab40ccc892993a21537c7c0d6210c diff --git a/lucene/licenses/randomizedtesting-runner-2.1.3.jar.sha1 b/lucene/licenses/randomizedtesting-runner-2.1.3.jar.sha1 new file mode 100644 index 00000000000..5da2ec2946a --- /dev/null +++ b/lucene/licenses/randomizedtesting-runner-2.1.3.jar.sha1 @@ -0,0 +1 @@ +d340caee99857ed0384681eea6219a4d937e7ee4 diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 87f33dc809f..db79ff021fd 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -794,6 +794,11 @@ public class MemoryIndex { return null; } + @Override + public void checkIntegrity() throws IOException { + // no-op + } + private class MemoryFields extends Fields { @Override public Iterator iterator() { diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 093c0c36dc6..74318c81ecc 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -21,6 +21,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; @@ -100,7 +101,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { private Set readQueries(String resource) throws IOException { Set queries = new HashSet<>(); InputStream stream = getClass().getResourceAsStream(resource); - BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); String line = null; while ((line = reader.readLine()) != null) { line = line.trim(); diff --git a/lucene/misc/build.xml b/lucene/misc/build.xml index d49ccf77e19..b5ee7b2a5f4 100644 --- a/lucene/misc/build.xml +++ b/lucene/misc/build.xml @@ -44,7 +44,7 @@ - + @@ -54,7 +54,7 @@ - + diff --git a/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java b/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java index 817b5620a89..4aee314ac8e 100644 --- a/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java +++ b/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java @@ -240,6 +240,11 @@ public class NativeUnixDirectory extends FSDirectory { public long length() { return fileLength + bufferPos; } + + @Override + public long getChecksum() throws IOException { + throw new UnsupportedOperationException("this directory currently does not work at all!"); + } @Override public void close() throws IOException { diff --git a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java index 49c036d5832..797a33d1e7f 100644 --- a/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java +++ b/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java @@ -172,12 +172,7 @@ public abstract class SorterTestBase extends LuceneTestCase { doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES)); doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO)); positions.setId(id); - if (doesntSupportOffsets.contains(TestUtil.getPostingsFormat(DOC_POSITIONS_FIELD))) { - // codec doesnt support offsets: just index positions for the field - doc.add(new Field(DOC_POSITIONS_FIELD, positions, TextField.TYPE_NOT_STORED)); - } else { - doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE)); - } + doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE)); doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id)); TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO); norms.setBoost(Float.intBitsToFloat(id)); @@ -264,10 +259,8 @@ public abstract class SorterTestBase extends LuceneTestCase { assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); - if (!doesntSupportOffsets.contains(TestUtil.getPostingsFormat(DOC_POSITIONS_FIELD))) { - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); - } + assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); + assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); } } @@ -284,10 +277,8 @@ public abstract class SorterTestBase extends LuceneTestCase { assertEquals("incorrect freq for doc=" + doc, sortedValues[doc].intValue() / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition()); - if (!doesntSupportOffsets.contains(TestUtil.getPostingsFormat(DOC_POSITIONS_FIELD))) { - assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); - assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); - } + assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset()); + assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset()); assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString())); } } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java index 728ac595cb3..68884892b97 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java @@ -60,7 +60,6 @@ public class CommonTermsQueryTest extends LuceneTestCase { public void testBasics() throws IOException { Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); - analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer); String[] docs = new String[] {"this is the end of the world right", "is this it or maybe not", @@ -191,7 +190,6 @@ public class CommonTermsQueryTest extends LuceneTestCase { public void testMinShouldMatch() throws IOException { Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); - analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer); String[] docs = new String[] {"this is the end of the world right", "is this it or maybe not", @@ -351,7 +349,6 @@ public class CommonTermsQueryTest extends LuceneTestCase { public void testExtend() throws IOException { Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random()); - analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer); String[] docs = new String[] {"this is the end of the world right", "is this it or maybe not", diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java index eaa71db5fe0..36e8b7e1b28 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java @@ -45,6 +45,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.List; @@ -63,7 +64,7 @@ public class TestParser extends LuceneTestCase { builder = new CorePlusExtensionsParser("contents", analyzer); BufferedReader d = new BufferedReader(new InputStreamReader( - TestParser.class.getResourceAsStream("reuters21578.txt"), "US-ASCII")); + TestParser.class.getResourceAsStream("reuters21578.txt"), StandardCharsets.US_ASCII)); dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); String line = d.readLine(); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java index 727d5e10eac..3d05367f071 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java @@ -33,9 +33,11 @@ import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class TestNumericRangeFilterBuilder extends LuceneTestCase { @@ -203,7 +205,7 @@ public class TestNumericRangeFilterBuilder extends LuceneTestCase { private static Document getDocumentFromString(String str) throws SAXException, IOException, ParserConfigurationException { - InputStream is = new ByteArrayInputStream(str.getBytes("UTF-8")); + InputStream is = new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java index 4ffee9d222a..53a7f09a4b2 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeQueryBuilder.java @@ -27,9 +27,11 @@ import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class TestNumericRangeQueryBuilder extends LuceneTestCase { @@ -166,7 +168,7 @@ public class TestNumericRangeQueryBuilder extends LuceneTestCase { private static Document getDocumentFromString(String str) throws SAXException, IOException, ParserConfigurationException { - InputStream is = new ByteArrayInputStream(str.getBytes("UTF-8")); + InputStream is = new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery2.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery2.java index c93c14a057c..178d3dac89e 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery2.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery2.java @@ -20,6 +20,7 @@ package org.apache.lucene.sandbox.queries; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -83,7 +84,7 @@ public class TestSlowFuzzyQuery2 extends LuceneTestCase { System.out.println("TEST: codePointTable=" + codePointTable); } InputStream stream = getClass().getResourceAsStream("fuzzyTestData.txt"); - BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); int bits = Integer.parseInt(reader.readLine()); int terms = (int) Math.pow(2, bits); diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java index 7ac787e5e1b..64c814786bc 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java @@ -23,6 +23,7 @@ import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -41,7 +42,7 @@ import java.util.List; */ public abstract class SpatialPrefixTree { - protected static final Charset UTF8 = Charset.forName("UTF-8"); + protected static final Charset UTF8 = StandardCharsets.UTF_8; protected final int maxLevels; diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestData.java b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestData.java index eeaea3081c9..fdf13cd7ec5 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestData.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestData.java @@ -24,6 +24,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Iterator; @@ -43,7 +44,7 @@ public class SpatialTestData { */ public static Iterator getTestData(InputStream in, SpatialContext ctx) throws IOException { List results = new ArrayList<>(); - BufferedReader bufInput = new BufferedReader(new InputStreamReader(in,"UTF-8")); + BufferedReader bufInput = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); try { String line; while ((line = bufInput.readLine()) != null) { diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestQuery.java b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestQuery.java index 4ccf469c022..a73baa9585a 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestQuery.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.spatial; */ import com.spatial4j.core.context.SpatialContext; + import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialArgsParser; @@ -25,6 +26,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -51,7 +53,7 @@ public class SpatialTestQuery { List results = new ArrayList<>(); - BufferedReader bufInput = new BufferedReader(new InputStreamReader(in,"UTF-8")); + BufferedReader bufInput = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); try { String line; for (int lineNumber = 1; (line = bufInput.readLine()) != null; lineNumber++) { diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java index 5e77021af79..91f9db6328c 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.util.BytesRef; @@ -47,7 +48,7 @@ public class PlainTextDictionary implements Dictionary { * NOTE: content is treated as UTF-8 */ public PlainTextDictionary(File file) throws IOException { - in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8)); + in = new BufferedReader(IOUtils.getDecodingReader(file, StandardCharsets.UTF_8)); } /** @@ -56,7 +57,7 @@ public class PlainTextDictionary implements Dictionary { * NOTE: content is treated as UTF-8 */ public PlainTextDictionary(InputStream dictFile) { - in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); + in = new BufferedReader(IOUtils.getDecodingReader(dictFile, StandardCharsets.UTF_8)); } /** diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index 1d24c2c9d8c..5006cb4e8d9 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.Set; import org.apache.lucene.search.spell.Dictionary; @@ -102,7 +103,7 @@ public class FileDictionary implements Dictionary { * NOTE: content is treated as UTF-8 */ public FileDictionary(InputStream dictFile, String fieldDelimiter) { - in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); + in = new BufferedReader(IOUtils.getDecodingReader(dictFile, StandardCharsets.UTF_8)); this.fieldDelimiter = fieldDelimiter; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java index e0c875a3c1a..d340134d59b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java @@ -209,7 +209,7 @@ public class SortedInputIterator implements InputIterator { } } - /** encodes an entry (bytes+(payload)+(contexts)+weight) to the provided writer */ + /** encodes an entry (bytes+(contexts)+(payload)+weight) to the provided writer */ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, Set contexts, long weight) throws IOException { int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); if (hasContexts) { @@ -223,10 +223,6 @@ public class SortedInputIterator implements InputIterator { } output.reset(buffer); output.writeBytes(spare.bytes, spare.offset, spare.length); - if (hasPayloads) { - output.writeBytes(payload.bytes, payload.offset, payload.length); - output.writeShort((short) payload.length); - } if (hasContexts) { for (BytesRef ctx : contexts) { output.writeBytes(ctx.bytes, ctx.offset, ctx.length); @@ -234,6 +230,10 @@ public class SortedInputIterator implements InputIterator { } output.writeShort((short) contexts.size()); } + if (hasPayloads) { + output.writeBytes(payload.bytes, payload.offset, payload.length); + output.writeShort((short) payload.length); + } output.writeLong(weight); writer.write(buffer, 0, output.getPosition()); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java index 63a26e20245..458d59d5ba9 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java @@ -17,6 +17,7 @@ package org.apache.lucene.search.suggest.analyzing; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.Set; @@ -192,7 +193,7 @@ public final class FuzzySuggester extends AnalyzingSuggester { Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java index 2bffc11db35..d3e106cd19f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java @@ -33,7 +33,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Locale; import java.util.Vector; @@ -224,15 +224,13 @@ public class JaspellTernarySearchTrie { BufferedReader in; if (compression) in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream( - new FileInputStream(file)), IOUtils.CHARSET_UTF_8)); + new FileInputStream(file)), StandardCharsets.UTF_8)); else in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream( - file)), IOUtils.CHARSET_UTF_8)); + file)), StandardCharsets.UTF_8)); String word; int pos; Float occur, one = new Float(1); - int numWords = 0; while ((word = in.readLine()) != null) { - numWords++; pos = word.indexOf("\t"); occur = one; if (pos != -1) { diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java index 2d880f587a5..29f7387fca7 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/FileDictionaryTest.java @@ -3,6 +3,7 @@ package org.apache.lucene.search.suggest; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.List; @@ -74,7 +75,7 @@ public class FileDictionaryTest extends LuceneTestCase { @Test public void testFileWithTerm() throws IOException { Map.Entry>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false); - InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); + InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8)); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); @@ -96,7 +97,7 @@ public class FileDictionaryTest extends LuceneTestCase { @Test public void testFileWithWeight() throws IOException { Map.Entry>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, false); - InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); + InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8)); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); @@ -118,7 +119,7 @@ public class FileDictionaryTest extends LuceneTestCase { @Test public void testFileWithWeightAndPayload() throws IOException { Map.Entry>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, true); - InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); + InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8)); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); @@ -144,7 +145,7 @@ public class FileDictionaryTest extends LuceneTestCase { @Test public void testFileWithOneEntry() throws IOException { Map.Entry>,String> fileInput = generateFileInput(1, FileDictionary.DEFAULT_FIELD_DELIMITER, true, true); - InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); + InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8)); FileDictionary dictionary = new FileDictionary(inputReader); List> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); @@ -171,7 +172,7 @@ public class FileDictionaryTest extends LuceneTestCase { @Test public void testFileWithDifferentDelimiter() throws IOException { Map.Entry>,String> fileInput = generateFileInput(atLeast(100), " , ", true, true); - InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes("UTF-8")); + InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8)); FileDictionary dictionary = new FileDictionary(inputReader, " , "); List> entries = fileInput.getKey(); InputIterator inputIter = dictionary.getEntryIterator(); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index 3bcedab6d50..1efebbefa0b 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -22,6 +22,7 @@ import java.io.InputStreamReader; import java.lang.reflect.Constructor; import java.net.URL; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -92,7 +93,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { LookupBenchmarkTest.benchmarkInput = input; } - static final Charset UTF_8 = Charset.forName("UTF-8"); + static final Charset UTF_8 = StandardCharsets.UTF_8; /** * Collect the multilingual input for benchmarks/ tests. diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java index 2b7cb67d186..3aff3c21d36 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java @@ -48,9 +48,11 @@ public class TestInputIterator extends LuceneTestCase { TreeMap> sorted = new TreeMap<>(comparator); TreeMap sortedWithoutPayload = new TreeMap<>(comparator); TreeMap>> sortedWithContext = new TreeMap<>(comparator); + TreeMap>>> sortedWithPayloadAndContext = new TreeMap<>(comparator); Input[] unsorted = new Input[num]; Input[] unsortedWithoutPayload = new Input[num]; Input[] unsortedWithContexts = new Input[num]; + Input[] unsortedWithPayloadAndContext = new Input[num]; Set ctxs; for (int i = 0; i < num; i++) { BytesRef key; @@ -67,9 +69,11 @@ public class TestInputIterator extends LuceneTestCase { sortedWithoutPayload.put(key, value); sorted.put(key, new SimpleEntry<>(value, payload)); sortedWithContext.put(key, new SimpleEntry<>(value, ctxs)); + sortedWithPayloadAndContext.put(key, new SimpleEntry<>(value, new SimpleEntry<>(payload, ctxs))); unsorted[i] = new Input(key, value, payload); unsortedWithoutPayload[i] = new Input(key, value); unsortedWithContexts[i] = new Input(key, value, ctxs); + unsortedWithPayloadAndContext[i] = new Input(key, value, payload, ctxs); } // test the sorted iterator wrapper with payloads @@ -96,6 +100,20 @@ public class TestInputIterator extends LuceneTestCase { } assertNull(wrapper.next()); + // test the sorted iterator wrapper with contexts and payload + wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithPayloadAndContext), comparator); + Iterator>>>> expectedPayloadContextEntries = sortedWithPayloadAndContext.entrySet().iterator(); + while (expectedPayloadContextEntries.hasNext()) { + Map.Entry>>> entry = expectedPayloadContextEntries.next(); + assertEquals(entry.getKey(), wrapper.next()); + assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); + Set actualCtxs = entry.getValue().getValue().getValue(); + assertEquals(actualCtxs, wrapper.contexts()); + BytesRef actualPayload = entry.getValue().getValue().getKey(); + assertEquals(actualPayload, wrapper.payload()); + } + assertNull(wrapper.next()); + // test the unsorted iterator wrapper with payloads wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted)); TreeMap> actual = new TreeMap<>(); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 1de609a4d2d..19a4d2b825f 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -39,12 +39,9 @@ import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -// Test requires postings offsets: -@SuppressCodecs({"Lucene3x","MockFixedIntBlock","MockVariableIntBlock","MockSep","MockRandom"}) public class AnalyzingInfixSuggesterTest extends LuceneTestCase { public void testBasic() throws Exception { diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index 1b027948ce8..acade2e764e 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -17,6 +17,7 @@ package org.apache.lucene.search.suggest.fst; * limitations under the License. */ +import java.nio.charset.StandardCharsets; import java.util.*; import org.apache.lucene.search.suggest.Lookup.LookupResult; @@ -144,7 +145,7 @@ public class FSTCompletionTest extends LuceneTestCase { public void testThreeByte() throws Exception { String key = new String(new byte[] { - (byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, "UTF-8"); + (byte) 0xF0, (byte) 0xA4, (byte) 0xAD, (byte) 0xA2}, StandardCharsets.UTF_8); FSTCompletionBuilder builder = new FSTCompletionBuilder(); builder.add(new BytesRef(key), 0); diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/LargeInputFST.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/LargeInputFST.java index 0cb6c668d02..d55e8febb64 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/LargeInputFST.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/LargeInputFST.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OfflineSorter; @@ -43,7 +44,7 @@ public class LargeInputFST { BufferedReader reader = new BufferedReader( new InputStreamReader( - new FileInputStream(input), "UTF-8")); + new FileInputStream(input), StandardCharsets.UTF_8)); BytesRef scratch = new BytesRef(); String line; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index da0ec735601..48cd8298541 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -25,6 +25,7 @@ import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.CountDownLatch; @@ -562,14 +563,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { if (random.nextBoolean()) { ft.setOmitNorms(true); } - String pf = TestUtil.getPostingsFormat("dummy"); - boolean supportsOffsets = !doesntSupportOffsets.contains(pf); switch(random.nextInt(4)) { case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break; case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break; case 2: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break; default: - if (supportsOffsets && offsetsAreCorrect) { + if (offsetsAreCorrect) { ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); } else { ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); @@ -892,7 +891,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { } protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException { - Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), "UTF-8"); + Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), StandardCharsets.UTF_8); final TokenStream ts = a.tokenStream("field", inputText); ts.reset(); new TokenStreamToDot(inputText, ts, new PrintWriter(w)).toDot(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java index b144f935040..09393ef6d69 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java @@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; /** @@ -68,7 +69,7 @@ final class MockPayloadFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8"))); + payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes(StandardCharsets.UTF_8))); int posIncr; if (pos == 0 || i % 2 == 1) { posIncr = 1; diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java index 659831856c3..cdcacc587bf 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.zip.ZipFile; import org.apache.lucene.analysis.Analyzer; @@ -34,9 +35,9 @@ public class VocabularyAssert { public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out) throws IOException { BufferedReader vocReader = new BufferedReader( - new InputStreamReader(voc, "UTF-8")); + new InputStreamReader(voc, StandardCharsets.UTF_8)); BufferedReader outputReader = new BufferedReader( - new InputStreamReader(out, "UTF-8")); + new InputStreamReader(out, StandardCharsets.UTF_8)); String inputWord = null; while ((inputWord = vocReader.readLine()) != null) { String expectedWord = outputReader.readLine(); @@ -49,7 +50,7 @@ public class VocabularyAssert { public static void assertVocabulary(Analyzer a, InputStream vocOut) throws IOException { BufferedReader vocReader = new BufferedReader( - new InputStreamReader(vocOut, "UTF-8")); + new InputStreamReader(vocOut, StandardCharsets.UTF_8)); String inputLine = null; while ((inputLine = vocReader.readLine()) != null) { if (inputLine.startsWith("#") || inputLine.trim().length() == 0) diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java index 512f92596d2..42484f5c469 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java @@ -306,5 +306,10 @@ public class AssertingDocValuesFormat extends DocValuesFormat { public long ramBytesUsed() { return in.ramBytesUsed(); } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index 8681b59a505..d9bdc1c48dc 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -89,6 +89,11 @@ public final class AssertingPostingsFormat extends PostingsFormat { public long ramBytesUsed() { return in.ramBytesUsed(); } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } } static class AssertingFieldsConsumer extends FieldsConsumer { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java index a2763875662..c58689bda79 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java @@ -76,6 +76,11 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat { public long ramBytesUsed() { return in.ramBytesUsed(); } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } } enum Status { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java index d6503f4d039..7119e595a04 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java @@ -75,6 +75,11 @@ public class AssertingTermVectorsFormat extends TermVectorsFormat { public long ramBytesUsed() { return in.ramBytesUsed(); } + + @Override + public void checkIntegrity() throws IOException { + in.checkIntegrity(); + } } enum Status { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java index 78680f1dd33..999014a56c7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java @@ -46,7 +46,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; import org.apache.lucene.util.packed.PackedInts.FormatAndBits; import org.apache.lucene.util.packed.PackedInts; -import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_CURRENT; +import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.VERSION_GCD_COMPRESSION; import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BLOCK_SIZE; import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.BYTES; import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.NUMBER; @@ -71,10 +71,11 @@ class Lucene42DocValuesConsumer extends DocValuesConsumer { try { String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.createOutput(dataName, state.context); - CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT); + // this writer writes the format 4.2 did! + CodecUtil.writeHeader(data, dataCodec, VERSION_GCD_COMPRESSION); String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); meta = state.directory.createOutput(metaName, state.context); - CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT); + CodecUtil.writeHeader(meta, metaCodec, VERSION_GCD_COMPRESSION); success = true; } finally { if (!success) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java deleted file mode 100644 index b714489fb9f..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java +++ /dev/null @@ -1,202 +0,0 @@ -package org.apache.lucene.codecs.mockintblock; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.blockterms.BlockTermsReader; -import org.apache.lucene.codecs.blockterms.BlockTermsWriter; -import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; -import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; -import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; -import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput; -import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput; -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.codecs.sep.IntIndexOutput; -import org.apache.lucene.codecs.sep.IntStreamFactory; -import org.apache.lucene.codecs.sep.SepPostingsReader; -import org.apache.lucene.codecs.sep.SepPostingsWriter; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.*; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** - * A silly test codec to verify core support for fixed - * sized int block encoders is working. The int encoder - * used here just writes each block as a series of vInt. - */ - -public final class MockFixedIntBlockPostingsFormat extends PostingsFormat { - - private final int blockSize; - - public MockFixedIntBlockPostingsFormat() { - this(1); - } - - public MockFixedIntBlockPostingsFormat(int blockSize) { - super("MockFixedIntBlock"); - this.blockSize = blockSize; - } - - @Override - public String toString() { - return getName() + "(blockSize=" + blockSize + ")"; - } - - // only for testing - public IntStreamFactory getIntFactory() { - return new MockIntFactory(blockSize); - } - - /** - * Encodes blocks as vInts of a fixed block size. - */ - public static class MockIntFactory extends IntStreamFactory { - private final int blockSize; - - public MockIntFactory(int blockSize) { - this.blockSize = blockSize; - } - - @Override - public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { - return new FixedIntBlockIndexInput(dir.openInput(fileName, context)) { - - @Override - protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) { - return new BlockReader() { - public void seek(long pos) {} - @Override - public void readBlock() throws IOException { - for(int i=0;i= count: "buffer.length=" + buffer.length + " count=" + count; - for(int i=0;i - - - - - - -Integer encoder implementations for testing. - - diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index f761565bcd6..33835ad336a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -18,8 +18,6 @@ package org.apache.lucene.codecs.mockrandom; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Random; import org.apache.lucene.codecs.BlockTreeTermsReader; @@ -44,22 +42,12 @@ import org.apache.lucene.codecs.memory.FSTOrdTermsReader; import org.apache.lucene.codecs.memory.FSTOrdTermsWriter; import org.apache.lucene.codecs.memory.FSTTermsReader; import org.apache.lucene.codecs.memory.FSTTermsWriter; -import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; -import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; -import org.apache.lucene.codecs.mocksep.MockSingleIntFactory; import org.apache.lucene.codecs.pulsing.PulsingPostingsReader; import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter; -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.codecs.sep.IntIndexOutput; -import org.apache.lucene.codecs.sep.IntStreamFactory; -import org.apache.lucene.codecs.sep.SepPostingsReader; -import org.apache.lucene.codecs.sep.SepPostingsWriter; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; @@ -93,48 +81,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat { } } - // Chooses random IntStreamFactory depending on file's extension - private static class MockIntStreamFactory extends IntStreamFactory { - private final int salt; - private final List delegates = new ArrayList<>(); - - public MockIntStreamFactory(Random random) { - salt = random.nextInt(); - delegates.add(new MockSingleIntFactory()); - final int blockSize = TestUtil.nextInt(random, 1, 2000); - delegates.add(new MockFixedIntBlockPostingsFormat.MockIntFactory(blockSize)); - final int baseBlockSize = TestUtil.nextInt(random, 1, 127); - delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize)); - // TODO: others - } - - private static String getExtension(String fileName) { - final int idx = fileName.indexOf('.'); - assert idx != -1; - return fileName.substring(idx); - } - - @Override - public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { - // Must only use extension, because IW.addIndexes can - // rename segment! - final IntStreamFactory f = delegates.get((Math.abs(salt ^ getExtension(fileName).hashCode())) % delegates.size()); - if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: read using int factory " + f + " from fileName=" + fileName); - } - return f.openInput(dir, fileName, context); - } - - @Override - public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException { - final IntStreamFactory f = delegates.get((Math.abs(salt ^ getExtension(fileName).hashCode())) % delegates.size()); - if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: write using int factory " + f + " to fileName=" + fileName); - } - return f.createOutput(dir, fileName, context); - } - } - @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { int minSkipInterval; @@ -171,16 +117,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { random.nextInt(); // consume a random for buffersize - PostingsWriterBase postingsWriter; - if (random.nextBoolean()) { - postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval); - } else { - if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: writing Standard postings"); - } - // TODO: randomize variables like acceptibleOverHead?! - postingsWriter = new Lucene41PostingsWriter(state, skipInterval); - } + PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state, skipInterval); if (random.nextBoolean()) { final int totTFCutoff = TestUtil.nextInt(random, 1, 20); @@ -327,20 +264,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize); } - PostingsReaderBase postingsReader; - - if (random.nextBoolean()) { - if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: reading Sep postings"); - } - postingsReader = new SepPostingsReader(state.directory, state.fieldInfos, state.segmentInfo, - state.context, new MockIntStreamFactory(random), state.segmentSuffix); - } else { - if (LuceneTestCase.VERBOSE) { - System.out.println("MockRandomCodec: reading Standard postings"); - } - postingsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); - } + PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); if (random.nextBoolean()) { final int totTFCutoff = TestUtil.nextInt(random, 1, 20); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java deleted file mode 100644 index 9fc3cd891d9..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java +++ /dev/null @@ -1,125 +0,0 @@ -package org.apache.lucene.codecs.mocksep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.blockterms.BlockTermsReader; -import org.apache.lucene.codecs.blockterms.BlockTermsWriter; -import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; -import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; -import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; -import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; -import org.apache.lucene.codecs.sep.SepPostingsReader; -import org.apache.lucene.codecs.sep.SepPostingsWriter; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.BytesRef; - -/** - * A silly codec that simply writes each file separately as - * single vInts. Don't use this (performance will be poor)! - * This is here just to test the core sep codec - * classes. - */ -public final class MockSepPostingsFormat extends PostingsFormat { - - public MockSepPostingsFormat() { - super("MockSep"); - } - - @Override - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - - PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockSingleIntFactory()); - - boolean success = false; - TermsIndexWriterBase indexWriter; - try { - indexWriter = new FixedGapTermsIndexWriter(state); - success = true; - } finally { - if (!success) { - postingsWriter.close(); - } - } - - success = false; - try { - FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter); - success = true; - return ret; - } finally { - if (!success) { - try { - postingsWriter.close(); - } finally { - indexWriter.close(); - } - } - } - } - - @Override - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - - PostingsReaderBase postingsReader = new SepPostingsReader(state.directory, state.fieldInfos, state.segmentInfo, - state.context, new MockSingleIntFactory(), state.segmentSuffix); - - TermsIndexReaderBase indexReader; - boolean success = false; - try { - indexReader = new FixedGapTermsIndexReader(state.directory, - state.fieldInfos, - state.segmentInfo.name, - BytesRef.getUTF8SortedAsUnicodeComparator(), - state.segmentSuffix, state.context); - success = true; - } finally { - if (!success) { - postingsReader.close(); - } - } - - success = false; - try { - FieldsProducer ret = new BlockTermsReader(indexReader, - state.directory, - state.fieldInfos, - state.segmentInfo, - postingsReader, - state.context, - state.segmentSuffix); - success = true; - return ret; - } finally { - if (!success) { - try { - postingsReader.close(); - } finally { - indexReader.close(); - } - } - } - } -} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java deleted file mode 100644 index ca42debdca2..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.apache.lucene.codecs.mocksep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.codecs.sep.IntIndexOutput; -import org.apache.lucene.codecs.sep.IntStreamFactory; - -import java.io.IOException; - -/** - * Encodes ints directly as vInts with {@link MockSingleIntIndexOutput} - * @lucene.experimental - */ -public class MockSingleIntFactory extends IntStreamFactory { - @Override - public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException { - return new MockSingleIntIndexInput(dir, fileName, context); - } - @Override - public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException { - return new MockSingleIntIndexOutput(dir, fileName, context); - } -} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java deleted file mode 100644 index 765aeabbbee..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.apache.lucene.codecs.mocksep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.sep.IntIndexInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; - -/** Reads IndexInputs written with {@link - * MockSingleIntIndexOutput}. NOTE: this class is just for - * demonstration purposes (it is a very slow way to read a - * block of ints). - * - * @lucene.experimental - */ -public class MockSingleIntIndexInput extends IntIndexInput { - private final IndexInput in; - - public MockSingleIntIndexInput(Directory dir, String fileName, IOContext context) - throws IOException { - in = dir.openInput(fileName, context); - CodecUtil.checkHeader(in, MockSingleIntIndexOutput.CODEC, - MockSingleIntIndexOutput.VERSION_START, - MockSingleIntIndexOutput.VERSION_START); - } - - @Override - public Reader reader() throws IOException { - return new Reader(in.clone()); - } - - @Override - public void close() throws IOException { - in.close(); - } - - /** - * Just reads a vInt directly from the file. - */ - public static class Reader extends IntIndexInput.Reader { - // clone: - private final IndexInput in; - - public Reader(IndexInput in) { - this.in = in; - } - - /** Reads next single int */ - @Override - public int next() throws IOException { - //System.out.println("msii.next() fp=" + in.getFilePointer() + " vs " + in.length()); - return in.readVInt(); - } - } - - class MockSingleIntIndexInputIndex extends IntIndexInput.Index { - private long fp; - - @Override - public void read(DataInput indexIn, boolean absolute) - throws IOException { - if (absolute) { - fp = indexIn.readVLong(); - } else { - fp += indexIn.readVLong(); - } - } - - @Override - public void copyFrom(IntIndexInput.Index other) { - fp = ((MockSingleIntIndexInputIndex) other).fp; - } - - @Override - public void seek(IntIndexInput.Reader other) throws IOException { - ((Reader) other).in.seek(fp); - } - - @Override - public String toString() { - return Long.toString(fp); - } - - @Override - public Index clone() { - MockSingleIntIndexInputIndex other = new MockSingleIntIndexInputIndex(); - other.fp = fp; - return other; - } - } - - @Override - public Index index() { - return new MockSingleIntIndexInputIndex(); - } -} - diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java deleted file mode 100644 index c8aa4172e7f..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java +++ /dev/null @@ -1,105 +0,0 @@ -package org.apache.lucene.codecs.mocksep; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.sep.IntIndexOutput; - -import java.io.IOException; - -/** Writes ints directly to the file (not in blocks) as - * vInt. - * - * @lucene.experimental -*/ -public class MockSingleIntIndexOutput extends IntIndexOutput { - private final IndexOutput out; - final static String CODEC = "SINGLE_INTS"; - final static int VERSION_START = 0; - final static int VERSION_CURRENT = VERSION_START; - - public MockSingleIntIndexOutput(Directory dir, String fileName, IOContext context) throws IOException { - out = dir.createOutput(fileName, context); - boolean success = false; - try { - CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT); - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(out); - } - } - } - - /** Write an int to the primary file */ - @Override - public void write(int v) throws IOException { - out.writeVInt(v); - } - - @Override - public Index index() { - return new MockSingleIntIndexOutputIndex(); - } - - @Override - public void close() throws IOException { - out.close(); - } - - @Override - public String toString() { - return "MockSingleIntIndexOutput fp=" + out.getFilePointer(); - } - - private class MockSingleIntIndexOutputIndex extends IntIndexOutput.Index { - long fp; - long lastFP; - @Override - public void mark() { - fp = out.getFilePointer(); - } - @Override - public void copyFrom(IntIndexOutput.Index other, boolean copyLast) { - fp = ((MockSingleIntIndexOutputIndex) other).fp; - if (copyLast) { - lastFP = ((MockSingleIntIndexOutputIndex) other).fp; - } - } - @Override - public void write(DataOutput indexOut, boolean absolute) - throws IOException { - if (absolute) { - indexOut.writeVLong(fp); - } else { - indexOut.writeVLong(fp - lastFP); - } - lastFP = fp; - } - - @Override - public String toString() { - return Long.toString(fp); - } - } -} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/package.html deleted file mode 100644 index c699ac181e6..00000000000 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -Very simple implementations of {@link org.apache.lucene.codecs.sep} for testing. - - diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index a83b5b6a8d7..12f01665f6b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -94,6 +94,9 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { } return sizeInBytes; } + + @Override + public void checkIntegrity() throws IOException {} } static class RAMField extends Terms { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java index 5ef63464ffb..10935a662f3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java @@ -667,18 +667,10 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()]; for(int fieldUpto=0;fieldUpto= 0 && allowPayloads; newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name, @@ -1232,7 +1224,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { fieldsProducer.close(); dir.close(); - TestUtil.rmDir(path); + TestUtil.rm(path); } public void testDocsOnly() throws Exception { @@ -1281,7 +1273,7 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { fieldsProducer = null; dir.close(); - TestUtil.rmDir(path); + TestUtil.rm(path); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java index a84a253c7dc..04fea7bd2df 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java @@ -18,6 +18,7 @@ package org.apache.lucene.index; */ import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -351,7 +352,7 @@ public abstract class BaseStoredFieldsFormatTestCase extends LuceneTestCase { ft.freeze(); final String string = TestUtil.randomSimpleString(random(), 50); - final byte[] bytes = string.getBytes("UTF-8"); + final byte[] bytes = string.getBytes(StandardCharsets.UTF_8); final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong(); final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt(); final float f = random().nextFloat(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index 8da536dd63a..6494a650d04 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -46,10 +46,7 @@ import org.apache.lucene.codecs.memory.FSTPostingsFormat; import org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat; import org.apache.lucene.codecs.memory.MemoryDocValuesFormat; import org.apache.lucene.codecs.memory.MemoryPostingsFormat; -import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; -import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; -import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat; import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat; @@ -142,9 +139,6 @@ public class RandomCodec extends Lucene46Codec { //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing //with such "wrapper" classes? new TestBloomFilteredLucene41Postings(), - new MockSepPostingsFormat(), - new MockFixedIntBlockPostingsFormat(TestUtil.nextInt(random, 1, 2000)), - new MockVariableIntBlockPostingsFormat( TestUtil.nextInt(random, 1, 127)), new MockRandomPostingsFormat(random), new NestedPulsingPostingsFormat(), new Lucene41WithOrds(TestUtil.nextInt(random, 1, 1000)), diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java index 0989d9e4977..8745bb5ed58 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java @@ -165,6 +165,11 @@ public class MockIndexOutputWrapper extends IndexOutput { dir.maybeThrowDeterministicException(); } + @Override + public long getChecksum() throws IOException { + return delegate.getChecksum(); + } + @Override public String toString() { return "MockIndexOutputWrapper(" + delegate + ")"; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/FailureMarker.java b/lucene/test-framework/src/java/org/apache/lucene/util/FailureMarker.java new file mode 100644 index 00000000000..7487f87aaf7 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/util/FailureMarker.java @@ -0,0 +1,48 @@ +package org.apache.lucene.util; + +import java.util.concurrent.atomic.AtomicInteger; + +import org.junit.runner.notification.Failure; +import org.junit.runner.notification.RunListener; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link RunListener} that detects suite/ test failures. We need it because failures + * due to thread leaks happen outside of any rule contexts. + */ +public class FailureMarker extends RunListener { + static final AtomicInteger failures = new AtomicInteger(); + + @Override + public void testFailure(Failure failure) throws Exception { + failures.incrementAndGet(); + } + + public static boolean hadFailures() { + return failures.get() > 0; + } + + static int getFailures() { + return failures.get(); + } + + public static void resetFailures() { + failures.set(0); + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java index 33d9d1d6917..4cc95bb0f3d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java @@ -29,6 +29,7 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; import java.util.zip.GZIPInputStream; @@ -133,7 +134,7 @@ public class LineFileDocs implements Closeable { } while (b >= 0 && b != 13 && b != 10); } - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 439238e70ee..05d51c23224 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -126,7 +126,8 @@ import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAs JUnit4MethodProvider.class }) @Listeners({ - RunListenerPrintReproduceInfo.class + RunListenerPrintReproduceInfo.class, + FailureMarker.class }) @SeedDecorators({MixWithSuiteName.class}) // See LUCENE-3995 for rationale. @ThreadLeakScope(Scope.SUITE) @@ -314,14 +315,6 @@ public abstract class LuceneTestCase extends Assert { CORE_DIRECTORIES.add("RAMDirectory"); }; - protected static final Set doesntSupportOffsets = new HashSet<>(Arrays.asList( - "Lucene3x", - "MockFixedIntBlock", - "MockVariableIntBlock", - "MockSep", - "MockRandom" - )); - // ----------------------------------------------------------------- // Fields initialized in class or instance rules. // ----------------------------------------------------------------- @@ -353,8 +346,7 @@ public abstract class LuceneTestCase extends Assert { /** * Suite failure marker (any error in the test or suite scope). */ - public final static TestRuleMarkFailure suiteFailureMarker = - new TestRuleMarkFailure(); + public static TestRuleMarkFailure suiteFailureMarker; /** * Ignore tests after hitting a designated number of initial failures. This @@ -419,7 +411,7 @@ public abstract class LuceneTestCase extends Assert { public static TestRule classRules = RuleChain .outerRule(new TestRuleIgnoreTestSuites()) .around(ignoreAfterMaxFailures) - .around(suiteFailureMarker) + .around(suiteFailureMarker = new TestRuleMarkFailure()) .around(new TestRuleAssertionsRequired()) .around(new StaticFieldsInvariantRule(STATIC_LEAK_THRESHOLD, true) { @Override @@ -800,6 +792,7 @@ public abstract class LuceneTestCase extends Assert { } c.setUseCompoundFile(r.nextBoolean()); c.setReaderPooling(r.nextBoolean()); + c.setCheckIntegrityAtMerge(r.nextBoolean()); return c; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreAfterMaxFailures.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreAfterMaxFailures.java index 66667142808..f3296f37825 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreAfterMaxFailures.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreAfterMaxFailures.java @@ -40,11 +40,6 @@ public final class TestRuleIgnoreAfterMaxFailures implements TestRule { * Maximum failures. Package scope for tests. */ int maxFailures; - - /** - * Current count of failures. Package scope for tests. - */ - int failuresSoFar; /** * @param maxFailures @@ -61,19 +56,13 @@ public final class TestRuleIgnoreAfterMaxFailures implements TestRule { return new Statement() { @Override public void evaluate() throws Throwable { + int failuresSoFar = FailureMarker.getFailures(); if (failuresSoFar >= maxFailures) { RandomizedTest.assumeTrue("Ignored, failures limit reached (" + failuresSoFar + " >= " + maxFailures + ").", false); } - try { - s.evaluate(); - } catch (Throwable t) { - if (!TestRuleMarkFailure.isAssumption(t)) { - failuresSoFar++; - } - throw t; - } + s.evaluate(); } }; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java index b9cc69a4e1a..6ef10cf99ac 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java @@ -103,42 +103,38 @@ public final class TestUtil { private static final int GET_TEMP_DIR_RETRY_THRESHOLD = 1000; /** - * Deletes a directory and everything underneath it. + * Deletes a file or a directory (and everything underneath it). */ - public static void rmDir(File dir) throws IOException { - if (dir.exists()) { - if (dir.isFile() && !dir.delete()) { - throw new IOException("could not delete " + dir); + public static void rm(File location) throws IOException { + if (!location.exists()) { + return; + } + + if (location.isDirectory()) { + for (File f : location.listFiles()) { + rm(f); } - for (File f : dir.listFiles()) { - if (f.isDirectory()) { - rmDir(f); - } else { - if (!f.delete()) { - throw new IOException("could not delete " + f); - } - } - } - if (!dir.delete()) { - throw new IOException("could not delete " + dir); + } else { + if (!location.delete()) { + throw new IOException("Could not delete: " + location.getAbsolutePath()); } } + + assert !location.exists(); } /** - * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first + * Convenience method: Unzip zipName + ".zip" into destDir, cleaning up + * destDir first. */ public static void unzip(File zipName, File destDir) throws IOException { - - ZipFile zipFile = new ZipFile(zipName); - - Enumeration entries = zipFile.entries(); - - rmDir(destDir); - + rm(destDir); destDir.mkdir(); LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker)); + ZipFile zipFile = new ZipFile(zipName); + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); @@ -189,15 +185,15 @@ public final class TestUtil { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.setCrossCheckTermVectors(crossCheckTermVectors); - checker.setInfoStream(new PrintStream(bos, false, "UTF-8"), false); + checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false); CheckIndex.Status indexStatus = checker.checkIndex(null); if (indexStatus == null || indexStatus.clean == false) { System.out.println("CheckIndex failed"); - System.out.println(bos.toString("UTF-8")); + System.out.println(bos.toString(IOUtils.UTF_8)); throw new RuntimeException("CheckIndex failed"); } else { if (LuceneTestCase.INFOSTREAM) { - System.out.println(bos.toString("UTF-8")); + System.out.println(bos.toString(IOUtils.UTF_8)); } return indexStatus; } @@ -213,8 +209,9 @@ public final class TestUtil { public static void checkReader(AtomicReader reader, boolean crossCheckTermVectors) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - PrintStream infoStream = new PrintStream(bos, false, "UTF-8"); + PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8); + reader.checkIntegrity(); FieldNormStatus fieldNormStatus = CheckIndex.testFieldNorms(reader, infoStream); TermIndexStatus termIndexStatus = CheckIndex.testPostings(reader, infoStream); StoredFieldStatus storedFieldStatus = CheckIndex.testStoredFields(reader, infoStream); @@ -227,11 +224,11 @@ public final class TestUtil { termVectorStatus.error != null || docValuesStatus.error != null) { System.out.println("CheckReader failed"); - System.out.println(bos.toString("UTF-8")); + System.out.println(bos.toString(IOUtils.UTF_8)); throw new RuntimeException("CheckReader failed"); } else { if (LuceneTestCase.INFOSTREAM) { - System.out.println(bos.toString("UTF-8")); + System.out.println(bos.toString(IOUtils.UTF_8)); } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java b/lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java index 8ba2538eb21..169c5498201 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java @@ -145,4 +145,9 @@ public class ThrottledIndexOutput extends IndexOutput { public void copyBytes(DataInput input, long numBytes) throws IOException { delegate.copyBytes(input, numBytes); } + + @Override + public long getChecksum() throws IOException { + return delegate.getChecksum(); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java index bb72ab0dd64..3384e9ee7a4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java @@ -21,6 +21,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -328,7 +329,7 @@ public class FSTTester { } if (LuceneTestCase.VERBOSE && pairs.size() <= 20 && fst != null) { - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); Util.toDot(fst, w, false, false); w.close(); System.out.println("SAVED out.dot"); diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 59d0dd3f33f..f85f32d7550 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -13,10 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat -org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat -org.apache.lucene.codecs.mocksep.MockSepPostingsFormat org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds diff --git a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java index 33402d03414..e7640ced9f2 100644 --- a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java +++ b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java @@ -38,9 +38,9 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Reader; -import java.io.UnsupportedEncodingException; import java.io.Writer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -58,6 +58,7 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; + import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -91,17 +92,19 @@ public class GetMavenDependenciesTask extends Task { private static final Properties allProperties = new Properties(); private static final Set modulesWithSeparateCompileAndTestPOMs = new HashSet<>(); - private static final Set optionalExternalDependencies = new HashSet<>(); + private static final Set globalOptionalExternalDependencies = new HashSet<>(); + private static final Map> perModuleOptionalExternalDependencies = new HashMap<>(); static { // Add modules here that have split compile and test POMs // - they need compile-scope deps to also be test-scope deps. modulesWithSeparateCompileAndTestPOMs.addAll (Arrays.asList("lucene-core", "lucene-codecs", "solr-core", "solr-solrj")); - // Add external dependencies here that should be optional (i.e., not invoke Maven's transitive dep mechanism). + // Add external dependencies here that should be optional for all modules + // (i.e., not invoke Maven's transitive dependency mechanism). // Format is "groupId:artifactId" - optionalExternalDependencies.addAll(Arrays.asList - ("org.slf4j:jcl-over-slf4j", "org.slf4j:jul-to-slf4j", "org.slf4j:slf4j-api", "org.slf4j:slf4j-log4j12")); + globalOptionalExternalDependencies.addAll(Arrays.asList + ("org.slf4j:jcl-over-slf4j", "org.slf4j:jul-to-slf4j", "org.slf4j:slf4j-log4j12")); } private final XPath xpath = XPathFactory.newInstance().newXPath(); @@ -151,7 +154,7 @@ public class GetMavenDependenciesTask extends Task { } public void setVerbose(boolean verbose) { - verboseLevel = (verbose ? Project.MSG_INFO : Project.MSG_VERBOSE); + verboseLevel = (verbose ? Project.MSG_VERBOSE : Project.MSG_INFO); } public void setCentralizedVersionsFile(File file) { @@ -200,12 +203,10 @@ public class GetMavenDependenciesTask extends Task { Writer writer = null; try { FileOutputStream outputStream = new FileOutputStream(mavenDependenciesFiltersFile); - writer = new OutputStreamWriter(outputStream, "ISO-8859-1"); + writer = new OutputStreamWriter(outputStream, StandardCharsets.ISO_8859_1); allProperties.store(writer, null); } catch (FileNotFoundException e) { throw new BuildException("Can't find file: '" + mavenDependenciesFiltersFile.getPath() + "'", e); - } catch (UnsupportedEncodingException e) { - throw new BuildException(e); } catch (IOException e) { throw new BuildException("Exception writing out '" + mavenDependenciesFiltersFile.getPath() + "'", e); } finally { @@ -242,7 +243,7 @@ public class GetMavenDependenciesTask extends Task { } catch (BuildException e) { throw e; } catch (Exception e) { - throw new BuildException("Exception reading file " + ivyXmlFile.getPath(), e); + throw new BuildException("Exception reading file " + ivyXmlFile.getPath() + ": " + e, e); } } addSharedExternalDependencies(); @@ -258,10 +259,10 @@ public class GetMavenDependenciesTask extends Task { // Delay adding shared compile-scope dependencies until after all have been processed, // so dependency sharing is limited to a depth of one. Map> sharedDependencies = new HashMap<>(); - for (String artifactId : interModuleExternalCompileScopeDependencies.keySet()) { + for (String module : interModuleExternalCompileScopeDependencies.keySet()) { TreeSet deps = new TreeSet<>(); - sharedDependencies.put(artifactId, deps); - Set moduleDependencies = interModuleExternalCompileScopeDependencies.get(artifactId); + sharedDependencies.put(module, deps); + Set moduleDependencies = interModuleExternalCompileScopeDependencies.get(module); if (null != moduleDependencies) { for (String otherArtifactId : moduleDependencies) { SortedSet otherExtDeps = allExternalDependencies.get(otherArtifactId); @@ -275,13 +276,13 @@ public class GetMavenDependenciesTask extends Task { } } } - for (String artifactId : interModuleExternalTestScopeDependencies.keySet()) { - SortedSet deps = sharedDependencies.get(artifactId); + for (String module : interModuleExternalTestScopeDependencies.keySet()) { + SortedSet deps = sharedDependencies.get(module); if (null == deps) { deps = new TreeSet<>(); - sharedDependencies.put(artifactId, deps); + sharedDependencies.put(module, deps); } - Set moduleDependencies = interModuleExternalTestScopeDependencies.get(artifactId); + Set moduleDependencies = interModuleExternalTestScopeDependencies.get(module); if (null != moduleDependencies) { for (String otherArtifactId : moduleDependencies) { int testScopePos = otherArtifactId.indexOf(":test"); @@ -295,8 +296,8 @@ public class GetMavenDependenciesTask extends Task { for (ExternalDependency otherDep : otherExtDeps) { if (otherDep.isTestDependency == isTestScope) { if ( ! deps.contains(otherDep) - && ( null == allExternalDependencies.get(artifactId) - || ! allExternalDependencies.get(artifactId).contains(otherDep))) { + && ( null == allExternalDependencies.get(module) + || ! allExternalDependencies.get(module).contains(otherDep))) { // Add test-scope clone only if it's not already a compile-scope dependency. ExternalDependency otherDepTestScope = new ExternalDependency (otherDep.groupId, otherDep.artifactId, otherDep.classifier, true, otherDep.isOptional); @@ -308,13 +309,21 @@ public class GetMavenDependenciesTask extends Task { } } } - for (String artifactId : sharedDependencies.keySet()) { - SortedSet deps = allExternalDependencies.get(artifactId); + for (String module : sharedDependencies.keySet()) { + SortedSet deps = allExternalDependencies.get(module); if (null == deps) { deps = new TreeSet<>(); - allExternalDependencies.put(artifactId, deps); + allExternalDependencies.put(module, deps); + } + for (ExternalDependency dep : sharedDependencies.get(module)) { + String dependencyCoordinate = dep.groupId + ":" + dep.artifactId; + if (globalOptionalExternalDependencies.contains(dependencyCoordinate) + || (perModuleOptionalExternalDependencies.containsKey(module) + && perModuleOptionalExternalDependencies.get(module).contains(dependencyCoordinate))) { + dep = new ExternalDependency(dep.groupId, dep.artifactId, dep.classifier, dep.isTestDependency, true); + } + deps.add(dep); } - deps.addAll(sharedDependencies.get(artifactId)); } } @@ -615,7 +624,7 @@ public class GetMavenDependenciesTask extends Task { // Pattern.compile("(lucene|solr)/build/(.*)/classes/java"); String artifact = matcher.group(2); artifact = artifact.replace('/', '-'); - artifact = artifact.replace("analysis-", "analyzers-"); + artifact = artifact.replace("(? deps = allExternalDependencies.get(module); if (null == deps) { deps = new TreeSet<>(); @@ -808,7 +819,7 @@ public class GetMavenDependenciesTask extends Task { throw new BuildException("Properties file does not exist: " + file.getPath()); } // Properties files are encoded as Latin-1 - final Reader reader = new InputStreamReader(stream, Charset.forName("ISO-8859-1")); + final Reader reader = new InputStreamReader(stream, StandardCharsets.ISO_8859_1); final Properties properties = new Properties(); try { properties.load(reader); diff --git a/lucene/tools/src/java/org/apache/lucene/validation/LibVersionsCheckTask.java b/lucene/tools/src/java/org/apache/lucene/validation/LibVersionsCheckTask.java index 7741fb6e971..9777df9a965 100644 --- a/lucene/tools/src/java/org/apache/lucene/validation/LibVersionsCheckTask.java +++ b/lucene/tools/src/java/org/apache/lucene/validation/LibVersionsCheckTask.java @@ -33,6 +33,7 @@ import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import javax.xml.parsers.ParserConfigurationException; + import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; @@ -44,6 +45,7 @@ import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Locale; @@ -188,7 +190,7 @@ public class LibVersionsCheckTask extends Task { + centralizedVersionsFile.getPath()); } // Properties files are encoded as Latin-1 - final Reader reader = new InputStreamReader(stream, Charset.forName("ISO-8859-1")); + final Reader reader = new InputStreamReader(stream, StandardCharsets.ISO_8859_1); final BufferedReader bufferedReader = new BufferedReader(reader); String line = null; diff --git a/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java b/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java index 7ee4a6a598e..c3a8d0ccefd 100644 --- a/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java +++ b/lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java @@ -22,14 +22,13 @@ import java.io.FileInputStream; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.IOException; - +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; - import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -302,7 +301,7 @@ outer: BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader - (new FileInputStream(f), "UTF-8")); + (new FileInputStream(f), StandardCharsets.UTF_8)); try { String checksum = reader.readLine(); if (null == checksum || 0 == checksum.length()) { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 58b9b04e3a5..fb3abaa5864 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -146,9 +146,6 @@ New Features improve logging and force refresh cluster state every 15 seconds. (Timothy Potter via shalin) - * SOLR-5228: Don't require or be inside of -- or - that be inside of . (Erick Erickson) - * SOLR-5749: A new Overseer status collection API exposes overseer queue sizes, timing statistics, success and error counts and last N failures per operation. (shalin) @@ -159,6 +156,21 @@ New Features * SOLR-4478: Allow cores to use configuration from a configsets directory outside their instance directory. (Alan Woodward, Erick Erickson) +* SOLR-5466: A new List collections and cluster status API which clients can use + to read collection and shard information instead of reading data directly from ZooKeeper. + (Dave Seltzer, Varun Thacker, Vitaliy Zhovtyuk, Erick Erickson, shalin) + +* SOLR-5795: New DocExpirationUpdateProcessorFactory supports computing an expiration + date for documents from the "TTL" expression, as well as automatically deleting expired + documents on a periodic basis. (hossman) + +* SOLR-5829: Allow ExpandComponent to accept query and filter query parameters + (Joel Bernstein) + +* SOLR-5654: Create a synonym filter factory that is (re)configurable, and + capable of reporting its configuration, via REST API. + (Tim Potter via Steve Rowe) + Bug Fixes ---------------------- @@ -167,6 +179,20 @@ Bug Fixes * SOLR-5893: On restarting overseer designate , move itself to front of the queue (Noble Paul) +* SOLR-5915: Attempts to specify the parserImpl for + solr.PreAnalyzedField fieldtype failed. (Mike McCandless) + +* SOLR-5943: SolrCmdDistributor does not distribute the openSearcher parameter. + (ludovic Boutros via shalin) + +* SOLR-5951: Fixed SolrDispatchFilter to throw useful exception on startup if + SLF4j logging jars are missing. (Uwe Schindler, Hossman, Shawn Heisey) + +* SOLR-5950: Maven config: make the org.slf4j:slf4j-api dependency transitive + (i.e., not optional) in all modules in which it's a dependency, including + solrj, except for the WAR, where it will remain optional. + (Uwe Schindler, Steve Rowe) + Optimizations ---------------------- * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY @@ -243,6 +269,12 @@ Other Changes * SOLR-5914: Cleanup and fix Solr's test cleanup code. (Mark Miller, Uwe Schindler) +* SOLR-5934: LBHttpSolrServer exception handling improvement and small test + improvements. (Gregory Chanan via Mark Miller) + +* SOLR-5773: CollapsingQParserPlugin should make elevated documents the + group head. (David Boychuck, Joel Bernstein) + ================== 4.7.1 ================== Versions of Major Components @@ -344,6 +376,9 @@ Bug Fixes * SOLR-5906: Collection create API ignores property.instanceDir parameter. (Varun Thacker, shalin) +* SOLR-5920: Distributed sort on DateField, BoolField and BCD{Int,Long,Str}Field + returns string cast exception (Eric Bus, AJ Lemke, hossman, Steve Rowe) + Other Changes --------------------- diff --git a/solr/common-build.xml b/solr/common-build.xml index 001488b66fe..f2b168d6faf 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -445,6 +445,12 @@ + + + + + + diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index bed79e6ffb9..b798bc96496 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -43,7 +43,6 @@ import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -303,20 +302,11 @@ public class ICUCollationField extends FieldType { @Override public Object marshalSortValue(Object value) { - if (null == value) { - return null; - } - final BytesRef val = (BytesRef)value; - return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + return marshalBase64SortValue(value); } @Override public Object unmarshalSortValue(Object value) { - if (null == value) { - return null; - } - final String val = (String)value; - final byte[] bytes = Base64.base64ToByteArray(val); - return new BytesRef(bytes); + return unmarshalBase64SortValue(value); } } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java index 3ba608c4990..20b34eedd16 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java @@ -18,10 +18,12 @@ package org.apache.solr.handler.dataimport; import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.nio.charset.StandardCharsets; import java.sql.Blob; import java.sql.Clob; import java.sql.SQLException; @@ -106,7 +108,7 @@ public class FieldReaderDataSource extends DataSource { private Reader getReader(Blob blob) throws SQLException, UnsupportedEncodingException { if (encoding == null) { - return (new InputStreamReader(blob.getBinaryStream(), "UTF-8")); + return (new InputStreamReader(blob.getBinaryStream(), StandardCharsets.UTF_8)); } else { return (new InputStreamReader(blob.getBinaryStream(), encoding)); } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java index 8001935e889..5f5136bcb3e 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java @@ -17,10 +17,12 @@ package org.apache.solr.handler.dataimport; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.Properties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; @@ -138,7 +140,7 @@ public class FileDataSource extends DataSource { protected Reader openStream(File file) throws FileNotFoundException, UnsupportedEncodingException { if (encoding == null) { - return new InputStreamReader(new FileInputStream(file), "UTF-8"); + return new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8); } else { return new InputStreamReader(new FileInputStream(file), encoding); } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java index 73e6fb1c6fb..7857ab90cf7 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java @@ -26,6 +26,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; @@ -192,7 +193,7 @@ public class SimplePropertiesWriter extends DIHProperties { filePath += File.separator; } filePath += filename; - propOutput = new OutputStreamWriter(new FileOutputStream(filePath), IOUtils.CHARSET_UTF_8); + propOutput = new OutputStreamWriter(new FileOutputStream(filePath), StandardCharsets.UTF_8); existingProps.store(propOutput, null); log.info("Wrote last indexed time to " + filename); } catch (Exception e) { @@ -214,7 +215,7 @@ public class SimplePropertiesWriter extends DIHProperties { } filePath += filename; propInput = new FileInputStream(filePath); - props.load(new InputStreamReader(propInput, IOUtils.CHARSET_UTF_8)); + props.load(new InputStreamReader(propInput, StandardCharsets.UTF_8)); log.info("Read " + filename); } catch (Exception e) { log.warn("Unable to read: " + filename); diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrWriter.java index a87745e1433..61cbaa3e20b 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrWriter.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrWriter.java @@ -28,6 +28,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.nio.charset.StandardCharsets; /** *

        Writes documents to SOLR.

        @@ -147,7 +148,7 @@ public class SolrWriter extends DIHWriterBase implements DIHWriter { } } - return new String(baos.toByteArray(), "UTF-8"); + return new String(baos.toByteArray(), StandardCharsets.UTF_8); } static String getDocCount() { diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/URLDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/URLDataSource.java index f462466a557..b7609ade6ea 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/URLDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/URLDataSource.java @@ -24,6 +24,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; import java.net.URLConnection; +import java.nio.charset.StandardCharsets; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -140,7 +141,7 @@ public class URLDataSource extends DataSource { public static final String BASE_URL = "baseUrl"; - public static final String UTF_8 = "UTF-8"; + public static final String UTF_8 = StandardCharsets.UTF_8.name(); public static final String CONNECTION_TIMEOUT_FIELD_NAME = "connectionTimeout"; diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/UrlEvaluator.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/UrlEvaluator.java index 2d8c1990bc4..78f95234e25 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/UrlEvaluator.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/UrlEvaluator.java @@ -4,6 +4,7 @@ import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVE import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.List; /* diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java index d839f277605..36a05cdd831 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java @@ -18,10 +18,10 @@ package org.apache.solr.handler.dataimport; import java.io.StringReader; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Properties; -import org.apache.lucene.util.IOUtils; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.slf4j.Logger; @@ -67,7 +67,7 @@ public class ZKPropertiesWriter extends SimplePropertiesWriter { StringWriter output = new StringWriter(); try { existing.store(output, null); - byte[] bytes = output.toString().getBytes(IOUtils.CHARSET_UTF_8); + byte[] bytes = output.toString().getBytes(StandardCharsets.UTF_8); if (!zkClient.exists(path, false)) { try { zkClient.makePath(path, false); @@ -90,7 +90,7 @@ public class ZKPropertiesWriter extends SimplePropertiesWriter { try { byte[] data = zkClient.getData(path, null, null, false); if (data != null) { - props.load(new StringReader(new String(data, "UTF-8"))); + props.load(new StringReader(new String(data, StandardCharsets.UTF_8))); } } catch (Exception e) { log.warn( diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestBuiltInEvaluators.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestBuiltInEvaluators.java index 7b8d632ad54..12c2e7f4c22 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestBuiltInEvaluators.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestBuiltInEvaluators.java @@ -20,6 +20,7 @@ import org.junit.Before; import org.junit.Test; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.*; @@ -30,7 +31,7 @@ import java.util.*; * @since solr 1.3 */ public class TestBuiltInEvaluators extends AbstractDataImportHandlerTestCase { - private static final String ENCODING = "UTF-8"; + private static final String ENCODING = StandardCharsets.UTF_8.name(); VariableResolver resolver; diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java index f946af943ed..faa7dcfac29 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDocBuilder2.java @@ -26,6 +26,8 @@ import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; +import java.nio.charset.StandardCharsets; + /** *

        * Test for DocBuilder using the test harness @@ -243,14 +245,14 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTestCase { Map params = createMap("baseDir", tmpdir.getAbsolutePath()); - createFile(tmpdir, "a.xml", "a.xml".getBytes("UTF-8"), true); - createFile(tmpdir, "b.xml", "b.xml".getBytes("UTF-8"), true); - createFile(tmpdir, "c.props", "c.props".getBytes("UTF-8"), true); + createFile(tmpdir, "a.xml", "a.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(tmpdir, "b.xml", "b.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(tmpdir, "c.props", "c.props".getBytes(StandardCharsets.UTF_8), true); runFullImport(dataConfigFileList, params); assertQ(req("*:*"), "//*[@numFound='3']"); // Add a new file after a full index is done - createFile(tmpdir, "t.xml", "t.xml".getBytes("UTF-8"), false); + createFile(tmpdir, "t.xml", "t.xml".getBytes(StandardCharsets.UTF_8), false); runFullImport(dataConfigFileList, params); // we should find only 1 because by default clean=true is passed // and this particular import should find only one file t.xml diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java index 9ab861d7657..1fac521c9d4 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java @@ -23,6 +23,7 @@ import org.junit.Test; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.*; @@ -41,9 +42,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa public void testSimple() throws IOException { File tmpdir = createTempDir(); - createFile(tmpdir, "a.xml", "a.xml".getBytes("UTF-8"), false); - createFile(tmpdir, "b.xml", "b.xml".getBytes("UTF-8"), false); - createFile(tmpdir, "c.props", "c.props".getBytes("UTF-8"), false); + createFile(tmpdir, "a.xml", "a.xml".getBytes(StandardCharsets.UTF_8), false); + createFile(tmpdir, "b.xml", "b.xml".getBytes(StandardCharsets.UTF_8), false); + createFile(tmpdir, "c.props", "c.props".getBytes(StandardCharsets.UTF_8), false); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "xml$", FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath()); @@ -69,19 +70,19 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa long minLength = Long.MAX_VALUE; String smallestFile = ""; - byte[] content = "abcdefgij".getBytes("UTF-8"); + byte[] content = "abcdefgij".getBytes(StandardCharsets.UTF_8); createFile(tmpdir, "a.xml", content, false); if (minLength > content.length) { minLength = content.length; smallestFile = "a.xml"; } - content = "abcdefgij".getBytes("UTF-8"); + content = "abcdefgij".getBytes(StandardCharsets.UTF_8); createFile(tmpdir, "b.xml", content, false); if (minLength > content.length) { minLength = content.length; smallestFile = "b.xml"; } - content = "abc".getBytes("UTF-8"); + content = "abc".getBytes(StandardCharsets.UTF_8); createFile(tmpdir, "c.props", content, false); if (minLength > content.length) { minLength = content.length; @@ -135,9 +136,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa public void testNTOT() throws IOException { File tmpdir = createTempDir(); - createFile(tmpdir, "a.xml", "a.xml".getBytes("UTF-8"), true); - createFile(tmpdir, "b.xml", "b.xml".getBytes("UTF-8"), true); - createFile(tmpdir, "c.props", "c.props".getBytes("UTF-8"), true); + createFile(tmpdir, "a.xml", "a.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(tmpdir, "b.xml", "b.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(tmpdir, "c.props", "c.props".getBytes(StandardCharsets.UTF_8), true); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "xml$", FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), @@ -159,7 +160,7 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa VariableResolver resolver = new VariableResolver(); String lastMod = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date(System.currentTimeMillis() - 50000)); resolver.addNamespace("a", createMap("x", lastMod)); - createFile(tmpdir, "t.xml", "t.xml".getBytes("UTF-8"), false); + createFile(tmpdir, "t.xml", "t.xml".getBytes(StandardCharsets.UTF_8), false); fList = getFiles(resolver, attrs); assertEquals(1, fList.size()); assertEquals("File name must be t.xml", new File(tmpdir, "t.xml").getAbsolutePath(), fList.get(0)); @@ -170,9 +171,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa File tmpdir = createTempDir(); File childdir = new File(tmpdir + "/child" ); childdir.mkdir(); - createFile(childdir, "a.xml", "a.xml".getBytes("UTF-8"), true); - createFile(childdir, "b.xml", "b.xml".getBytes("UTF-8"), true); - createFile(childdir, "c.props", "c.props".getBytes("UTF-8"), true); + createFile(childdir, "a.xml", "a.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(childdir, "b.xml", "b.xml".getBytes(StandardCharsets.UTF_8), true); + createFile(childdir, "c.props", "c.props".getBytes(StandardCharsets.UTF_8), true); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "^.*\\.xml$", FileListEntityProcessor.BASE_DIR, childdir.getAbsolutePath(), diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java index c07a6cf29a3..5f0c58d0aa8 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java @@ -1,6 +1,7 @@ package org.apache.solr.handler.dataimport; import java.io.File; +import java.nio.charset.StandardCharsets; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -32,9 +33,9 @@ public class TestFileListWithLineEntityProcessor extends AbstractDataImportHandl public void test() throws Exception { File tmpdir = TestUtil.createTempDir(LuceneTestCase.getTestClass().getSimpleName()); - createFile(tmpdir, "a.txt", "a line one\na line two\na line three".getBytes("UTF-8"), false); - createFile(tmpdir, "b.txt", "b line one\nb line two".getBytes("UTF-8"), false); - createFile(tmpdir, "c.txt", "c line one\nc line two\nc line three\nc line four".getBytes("UTF-8"), false); + createFile(tmpdir, "a.txt", "a line one\na line two\na line three".getBytes(StandardCharsets.UTF_8), false); + createFile(tmpdir, "b.txt", "b line one\nb line two".getBytes(StandardCharsets.UTF_8), false); + createFile(tmpdir, "c.txt", "c line one\nc line two\nc line three\nc line four".getBytes(StandardCharsets.UTF_8), false); String config = generateConfig(tmpdir); LocalSolrQueryRequest request = lrf.makeRequest( diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java index c370223fdb8..778d1913847 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestXPathEntityProcessor.java @@ -19,6 +19,7 @@ package org.apache.solr.handler.dataimport; import java.io.File; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -44,7 +45,7 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase public void withFieldsAndXpath() throws Exception { File tmpdir = createTempDir(); - createFile(tmpdir, "x.xsl", xsl.getBytes("UTF-8"), false); + createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), false); Map entityAttrs = createMap("name", "e", "url", "cd.xml", XPathEntityProcessor.FOR_EACH, "/catalog/cd"); List fields = new ArrayList(); @@ -332,7 +333,7 @@ public class TestXPathEntityProcessor extends AbstractDataImportHandlerTestCase @Test public void withDefaultSolrAndXsl() throws Exception { File tmpdir = createTempDir(); - AbstractDataImportHandlerTestCase.createFile(tmpdir, "x.xsl", xsl.getBytes("UTF-8"), + AbstractDataImportHandlerTestCase.createFile(tmpdir, "x.xsl", xsl.getBytes(StandardCharsets.UTF_8), false); Map entityAttrs = createMap("name", "e", diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java index 35b8f3c8c6e..8e30d1a880e 100644 --- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java +++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java @@ -20,6 +20,7 @@ package org.apache.solr.handler.extraction; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import java.util.Map.Entry; import java.util.regex.Pattern; @@ -73,7 +74,7 @@ public class RegexRulesPasswordProvider implements PasswordProvider { */ public static LinkedHashMap parseRulesFile(InputStream is) { LinkedHashMap rules = new LinkedHashMap<>(); - BufferedReader br = new BufferedReader(IOUtils.getDecodingReader(is, IOUtils.CHARSET_UTF_8)); + BufferedReader br = new BufferedReader(IOUtils.getDecodingReader(is, StandardCharsets.UTF_8)); String line; try { int linenum = 0; diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java index 9a50840f578..3b83ea79306 100644 --- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -126,10 +127,9 @@ public class LangDetectLanguageIdentifierUpdateProcessorFactory extends } loaded = true; List profileData = new ArrayList<>(); - Charset encoding = Charset.forName("UTF-8"); for (String language : languages) { InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + language); - BufferedReader reader = new BufferedReader(new InputStreamReader(stream, encoding)); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); profileData.add(new String(IOUtils.toCharArray(reader))); reader.close(); } diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java index 87e6e39074f..55066ffb12e 100644 --- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java @@ -31,6 +31,7 @@ import java.io.Writer; import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; +import java.nio.charset.StandardCharsets; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Arrays; @@ -82,7 +83,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kitesdk.morphline.base.Fields; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.io.ByteStreams; @@ -924,7 +924,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { FileSystem fs = fullInputList.getFileSystem(conf); FSDataOutputStream out = fs.create(fullInputList); try { - Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); for (Path inputFile : inputFiles) { FileSystem inputFileFs = inputFile.getFileSystem(conf); @@ -949,7 +949,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { in = inputList.getFileSystem(conf).open(inputList); } try { - BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); String line; while ((line = reader.readLine()) != null) { writer.write(line + "\n"); @@ -988,7 +988,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { private void randomizeFewInputFiles(FileSystem fs, Path outputStep2Dir, Path fullInputList) throws IOException { List lines = new ArrayList(); - BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), StandardCharsets.UTF_8)); try { String line; while ((line = reader.readLine()) != null) { @@ -1001,7 +1001,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { Collections.shuffle(lines, new Random(421439783L)); // constant seed for reproducability FSDataOutputStream out = fs.create(new Path(outputStep2Dir, FULL_INPUT_LIST)); - Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); try { for (String line : lines) { writer.write(line + "\n"); @@ -1135,7 +1135,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { * turnaround during trial & debug sessions */ private void dryRun(MorphlineMapRunner runner, FileSystem fs, Path fullInputList) throws IOException { - BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(fullInputList), StandardCharsets.UTF_8)); try { String line; while ((line = reader.readLine()) != null) { @@ -1154,7 +1154,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { int numFiles = 0; FSDataOutputStream out = fs.create(fullInputList); try { - Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); for (FileStatus stat : dirs) { LOG.debug("Adding path {}", stat.getPath()); Path dir = new Path(stat.getPath(), "data/index"); @@ -1263,7 +1263,7 @@ public class MapReduceIndexerTool extends Configured implements Tool { byte[] bytes = ByteStreams.toByteArray(in); in.close(); Preconditions.checkArgument(bytes.length > 0); - int solrShard = Integer.parseInt(new String(bytes, Charsets.UTF_8)); + int solrShard = Integer.parseInt(new String(bytes, StandardCharsets.UTF_8)); if (!delete(solrShardNumberFile, false, fs)) { return false; } diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java index 3de00b8d445..9f0498dc455 100644 --- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/SolrOutputFormat.java @@ -22,6 +22,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; import java.util.Locale; @@ -242,7 +243,7 @@ public class SolrOutputFormat extends FileOutputFormat { ZipEntry ze = new ZipEntry("solr.xml"); zos.putNextEntry(ze); - zos.write("".getBytes("UTF-8")); + zos.write("".getBytes(StandardCharsets.UTF_8)); zos.flush(); zos.closeEntry(); zos.close(); diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java index d2efa96cdcf..7570493d49d 100644 --- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/ToolRunnerHelpFormatter.java @@ -24,6 +24,7 @@ import java.io.PrintWriter; import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import net.sourceforge.argparse4j.ArgumentParsers; import net.sourceforge.argparse4j.helper.ASCIITextWidthCounter; @@ -42,7 +43,7 @@ class ToolRunnerHelpFormatter { String msg; try { ToolRunner.printGenericCommandUsage(new PrintStream(bout, true, "UTF-8")); - msg = new String(bout.toByteArray(), "UTF-8"); + msg = new String(bout.toByteArray(), StandardCharsets.UTF_8); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); // unreachable } diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java index 566068f0c40..6f997329b17 100644 --- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/TreeMergeOutputFormat.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; @@ -43,7 +44,6 @@ import org.apache.solr.store.hdfs.HdfsDirectory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; /** @@ -188,7 +188,7 @@ public class TreeMergeOutputFormat extends FileOutputFormat LOG.debug("Merging into outputShardNum: " + outputShardNum + " from taskId: " + taskId); Path shardNumberFile = new Path(workDir.getParent().getParent(), TreeMergeMapper.SOLR_SHARD_NUMBER); OutputStream out = shardNumberFile.getFileSystem(context.getConfiguration()).create(shardNumberFile); - Writer writer = new OutputStreamWriter(out, Charsets.UTF_8); + Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); writer.write(String.valueOf(outputShardNum)); writer.flush(); writer.close(); diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java index 1ad141a4264..8a5eaaf6d9e 100644 --- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java +++ b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/UnbufferedDataInputInputStream.java @@ -22,6 +22,7 @@ import java.io.DataInput; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; public class UnbufferedDataInputInputStream extends org.apache.solr.common.util.DataInputInputStream { private final DataInputStream in; @@ -97,7 +98,7 @@ public class UnbufferedDataInputInputStream extends org.apache.solr.common.util. @Override public String readLine() throws IOException { - BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)); return reader.readLine(); } diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java index d00b904a9d4..e7b21178daf 100644 --- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java @@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; @@ -191,7 +192,7 @@ public class MapReduceIndexerToolArgumentParserTest extends SolrTestCaseJ4 { public void testArgsParserHelp() throws UnsupportedEncodingException { String[] args = new String[] { "--help" }; assertEquals(new Integer(0), parser.parseArgs(args, conf, opts)); - String helpText = new String(bout.toByteArray(), "UTF-8"); + String helpText = new String(bout.toByteArray(), StandardCharsets.UTF_8); assertTrue(helpText.contains("MapReduce batch job driver that ")); assertTrue(helpText.contains("bin/hadoop command")); assertEquals(0, berr.toByteArray().length); @@ -458,9 +459,9 @@ public class MapReduceIndexerToolArgumentParserTest extends SolrTestCaseJ4 { private void assertArgumentParserException(String[] args) throws UnsupportedEncodingException { assertEquals("should have returned fail code", new Integer(1), parser.parseArgs(args, conf, opts)); - assertEquals("no sys out expected:" + new String(bout.toByteArray(), "UTF-8"), 0, bout.toByteArray().length); + assertEquals("no sys out expected:" + new String(bout.toByteArray(), StandardCharsets.UTF_8), 0, bout.toByteArray().length); String usageText; - usageText = new String(berr.toByteArray(), "UTF-8"); + usageText = new String(berr.toByteArray(), StandardCharsets.UTF_8); assertTrue("should start with usage msg \"usage: hadoop \":" + usageText, usageText.startsWith("usage: hadoop ")); } diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java index 870616f0790..a076851353e 100644 --- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java @@ -22,6 +22,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.lang.reflect.Array; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import org.apache.commons.io.FileUtils; @@ -308,7 +309,7 @@ public class MorphlineBasicMiniMRTest extends SolrTestCaseJ4 { assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); - Writer wr = new OutputStreamWriter(os, "UTF-8"); + Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java index c6dcce977bb..8f47c9413a3 100644 --- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java +++ b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java @@ -24,6 +24,7 @@ import java.io.UnsupportedEncodingException; import java.io.Writer; import java.lang.reflect.Array; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -679,7 +680,7 @@ public class MorphlineGoLiveMiniMRTest extends AbstractFullDistribZkTestBase { Path dataDir, String localFile) throws IOException, UnsupportedEncodingException { Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); - Writer wr = new OutputStreamWriter(os, "UTF-8"); + Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8); wr.write(DATADIR + File.separator + localFile); wr.close(); diff --git a/solr/contrib/velocity/src/java/org/apache/solr/response/SolrParamResourceLoader.java b/solr/contrib/velocity/src/java/org/apache/solr/response/SolrParamResourceLoader.java index 54d86ddc1d3..8f033b585ed 100644 --- a/solr/contrib/velocity/src/java/org/apache/solr/response/SolrParamResourceLoader.java +++ b/solr/contrib/velocity/src/java/org/apache/solr/response/SolrParamResourceLoader.java @@ -25,7 +25,7 @@ import org.apache.commons.collections.ExtendedProperties; import java.io.ByteArrayInputStream; import java.io.InputStream; -import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -57,11 +57,7 @@ public class SolrParamResourceLoader extends ResourceLoader { @Override public InputStream getResourceStream(String s) throws ResourceNotFoundException { String template = templates.get(s); - try { - return template == null ? null : new ByteArrayInputStream(template.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); // may not happen - } + return template == null ? null : new ByteArrayInputStream(template.getBytes(StandardCharsets.UTF_8)); } @Override diff --git a/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java b/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java index 413d708f079..3318713045a 100644 --- a/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java +++ b/solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java @@ -17,21 +17,29 @@ package org.apache.solr.response; -import org.apache.lucene.util.IOUtils; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.StringWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.Properties; + import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.SolrResponseBase; -import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.velocity.Template; import org.apache.velocity.VelocityContext; import org.apache.velocity.app.VelocityEngine; import org.apache.velocity.runtime.RuntimeConstants; -import org.apache.velocity.tools.generic.*; - -import java.io.*; -import java.util.Properties; +import org.apache.velocity.tools.generic.ComparisonDateTool; +import org.apache.velocity.tools.generic.EscapeTool; +import org.apache.velocity.tools.generic.ListTool; +import org.apache.velocity.tools.generic.MathTool; +import org.apache.velocity.tools.generic.NumberTool; +import org.apache.velocity.tools.generic.SortTool; public class VelocityResponseWriter implements QueryResponseWriter { @@ -132,7 +140,7 @@ public class VelocityResponseWriter implements QueryResponseWriter { try { is = resourceLoader.getResourceStream(propFile); Properties props = new Properties(); - props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); + props.load(new InputStreamReader(is, StandardCharsets.UTF_8)); engine.init(props); } finally { diff --git a/solr/core/build.xml b/solr/core/build.xml index 90c4bc4a872..3d8d11f848d 100644 --- a/solr/core/build.xml +++ b/solr/core/build.xml @@ -32,9 +32,14 @@ + + + + + diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java index 1e3a2db1719..304c0a2b5a0 100644 --- a/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/BasicAccumulator.java @@ -18,8 +18,10 @@ package org.apache.solr.analytics.accumulator; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.Date; +import java.util.List; import java.util.Set; import org.apache.lucene.index.AtomicReaderContext; @@ -35,6 +37,8 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.TrieDateField; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Supplier; @@ -42,6 +46,7 @@ import com.google.common.base.Supplier; * A BasicAccumulator manages the ValueCounters and Expressions without regard to Facets. */ public class BasicAccumulator extends ValueAccumulator { + private static final Logger log = LoggerFactory.getLogger(BasicAccumulator.class); protected final SolrIndexSearcher searcher; protected final AnalyticsRequest request; protected final DocSet docs; @@ -57,14 +62,16 @@ public class BasicAccumulator extends ValueAccumulator { this.searcher = searcher; this.docs = docs; this.request = request; - statsCollectorArraySupplier = StatsCollectorSupplierFactory.create(searcher.getSchema(), request); + final List exRequests = new ArrayList(request.getExpressions()); // make a copy here + Collections.sort(exRequests); + log.info("Processing request '"+request.getName()+"'"); + statsCollectorArraySupplier = StatsCollectorSupplierFactory.create(searcher.getSchema(), exRequests); statsCollectors = statsCollectorArraySupplier.get(); - int size = request.getExpressions().size(); + int size = exRequests.size(); expressionNames = new String[size]; expressionStrings = new String[size]; int count = 0; - Collections.sort(request.getExpressions()); - for (ExpressionRequest expRequest : request.getExpressions()) { + for (ExpressionRequest expRequest : exRequests) { expressionNames[count] = expRequest.getName(); expressionStrings[count++] = expRequest.getExpressionString(); } diff --git a/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java b/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java index c23e63364c9..61ed6e100b9 100644 --- a/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java +++ b/solr/core/src/java/org/apache/solr/analytics/accumulator/FacetingAccumulator.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Filter; @@ -98,7 +99,7 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA List rangeFreqs = request.getRangeFacets(); List queryFreqs = request.getQueryFacets(); - this.fieldFacetExpressions = new LinkedHashMap<>(fieldFreqs.size()); + this.fieldFacetExpressions = new TreeMap<>(); this.rangeFacetExpressions = new LinkedHashMap<>(rangeFreqs.size()); this.queryFacetExpressions = new LinkedHashMap<>(queryFreqs.size()); this.fieldFacetCollectors = new LinkedHashMap<>(fieldFreqs.size()); @@ -120,8 +121,8 @@ public class FacetingAccumulator extends BasicAccumulator implements FacetValueA final SchemaField ff = fr.getField(); final FieldFacetAccumulator facc = FieldFacetAccumulator.create(searcher, this, ff); facetAccumulators.add(facc); - fieldFacetExpressions.put(freq.getName(), new LinkedHashMap() ); - fieldFacetCollectors.put(freq.getName(), new LinkedHashMap()); + fieldFacetExpressions.put(freq.getName(), new TreeMap() ); + fieldFacetCollectors.put(freq.getName(), new TreeMap()); } /** * For each range and query facet request add a bucket to the corresponding diff --git a/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java b/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java index 3e56c89c665..1455cbcf320 100644 --- a/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java +++ b/solr/core/src/java/org/apache/solr/analytics/expression/BaseExpression.java @@ -35,7 +35,10 @@ public class BaseExpression extends Expression { } public Comparable getValue() { - return statsCollector.getStat(stat); + if(statsCollector.getStatsList().contains(stat)) { + return statsCollector.getStat(stat); + } + return null; } } /** diff --git a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java index e019569fb0f..c1ec21fb15b 100644 --- a/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java +++ b/solr/core/src/java/org/apache/solr/analytics/request/AnalyticsStats.java @@ -33,6 +33,8 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class which computes the set of {@link AnalyticsRequest}s. @@ -43,6 +45,7 @@ public class AnalyticsStats { protected SolrIndexSearcher searcher; protected SolrQueryRequest req; protected AnalyticsStatisticsCollector statsCollector; + private static final Logger log = LoggerFactory.getLogger(AnalyticsStats.class); public AnalyticsStats(SolrQueryRequest req, DocSet docs, SolrParams params, AnalyticsStatisticsCollector statsCollector) { this.req = req; @@ -69,6 +72,10 @@ public class AnalyticsStats { return res; } statsCollector.addRequests(requests.size()); + + // Get filter to all docs + Filter filter = docs.getTopFilter(); + // Computing each Analytics Request Seperately for( AnalyticsRequest areq : requests ){ // The Accumulator which will control the statistics generation @@ -84,7 +91,7 @@ public class AnalyticsStats { accumulator = FacetingAccumulator.create(searcher, docs, areq, req); } } catch (IOException e) { - System.err.println(e.getMessage()); + log.warn("Analytics request '"+areq.getName()+"' failed", e); continue; } @@ -96,7 +103,6 @@ public class AnalyticsStats { statsCollector.addQueries(((BasicAccumulator)accumulator).getNumQueries()); // Loop through the documents returned by the query and add to accumulator - Filter filter = docs.getTopFilter(); List contexts = searcher.getTopReaderContext().leaves(); for (int leafNum = 0; leafNum < contexts.size(); leafNum++) { AtomicReaderContext context = contexts.get(leafNum); diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java b/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java index 08608861789..45cec2bc08a 100644 --- a/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/MinMaxStatsCollector.java @@ -74,7 +74,7 @@ public class MinMaxStatsCollector implements StatsCollector{ if (stat.equals("min")&&min!=null) { return (Comparable)min.toObject(); } - if (stat.equals("max")&&min!=null) { + if (stat.equals("max")&&max!=null) { return (Comparable)max.toObject(); } if (stat.equals("count")) { @@ -83,7 +83,9 @@ public class MinMaxStatsCollector implements StatsCollector{ if (stat.equals("missing")) { return new Long(missingCount); } + return null; +// throw new IllegalArgumentException("No stat named '"+stat+"' in this collector " + this); } public Set getStatsList() { diff --git a/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java index 7b2d14b74af..eac86643964 100644 --- a/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java +++ b/solr/core/src/java/org/apache/solr/analytics/statistics/StatsCollectorSupplierFactory.java @@ -21,9 +21,11 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; @@ -33,7 +35,6 @@ import org.apache.lucene.queries.function.valuesource.IntFieldSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.search.FieldCache; import org.apache.solr.analytics.expression.ExpressionFactory; -import org.apache.solr.analytics.request.AnalyticsRequest; import org.apache.solr.analytics.request.ExpressionRequest; import org.apache.solr.analytics.util.AnalyticsParams; import org.apache.solr.analytics.util.AnalyticsParsers; @@ -67,10 +68,13 @@ import org.apache.solr.schema.TrieDoubleField; import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieLongField; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Supplier; public class StatsCollectorSupplierFactory { + private static final Logger log = LoggerFactory.getLogger(StatsCollectorSupplierFactory.class); // FunctionTypes final static int NUMBER_TYPE = 0; @@ -83,18 +87,18 @@ public class StatsCollectorSupplierFactory { * Builds a Supplier that will generate identical arrays of new StatsCollectors. * * @param schema The Schema being used. - * @param request The AnalyticsRequest to generate a StatsCollector[] from. + * @param exRequests The expression requests to generate a StatsCollector[] from. * @return A Supplier that will return an array of new StatsCollector. */ @SuppressWarnings("unchecked") - public static Supplier create(IndexSchema schema, AnalyticsRequest request) { - final Map> collectorStats = new HashMap<>(); - final Map> collectorPercs = new HashMap<>(); - final Map collectorSources = new HashMap<>(); + public static Supplier create(IndexSchema schema, List exRequests ) { + final Map> collectorStats = new TreeMap<>(); + final Map> collectorPercs = new TreeMap<>(); + final Map collectorSources = new TreeMap<>(); // Iterate through all expression request to make a list of ValueSource strings // and statistics that need to be calculated on those ValueSources. - for (ExpressionRequest expRequest : request.getExpressions()) { + for (ExpressionRequest expRequest : exRequests) { String statExpression = expRequest.getExpressionString(); Set statistics = getStatistics(statExpression); if (statistics == null) { @@ -146,7 +150,11 @@ public class StatsCollectorSupplierFactory { stats = new HashSet<>(); collectorStats.put(source, stats); } - stats.add(stat); + if(AnalyticsParams.STAT_PERCENTILE.equals(stat)) { + stats.add(stat + "_"+ arguments[0]); + } else { + stats.add(stat); + } } } String[] keys = collectorStats.keySet().toArray(new String[0]); @@ -168,7 +176,7 @@ public class StatsCollectorSupplierFactory { if (percs!=null) { collectorPercs.put(builtString, percs); } - for (ExpressionRequest er : request.getExpressions()) { + for (ExpressionRequest er : exRequests) { er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString)); } } @@ -182,6 +190,8 @@ public class StatsCollectorSupplierFactory { }; } + log.info("Stats objects: "+collectorStats.size()+" sr="+collectorSources.size()+" pr="+collectorPercs.size() ); + // All information is stored in final arrays so that nothing // has to be computed when the Supplier's get() method is called. final Set[] statsArr = collectorStats.values().toArray(new Set[0]); diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java index 6b9e0c7eb89..af30c74410a 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java @@ -68,10 +68,11 @@ public abstract class ElectionContext { public void cancelElection() throws InterruptedException, KeeperException { try { + log.info("canceling election {}",leaderSeqPath ); zkClient.delete(leaderSeqPath, -1, true); } catch (NoNodeException e) { // fine - log.warn("cancelElection did not find election node to remove"); + log.warn("cancelElection did not find election node to remove",e); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java index d1f5f96b955..88b564278fc 100644 --- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java +++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java @@ -67,6 +67,8 @@ public class LeaderElector { private volatile ElectionContext context; + private ElectionWatcher watcher; + public LeaderElector(SolrZkClient zkClient) { this.zkClient = zkClient; zkCmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout()); @@ -90,7 +92,7 @@ public class LeaderElector { // get all other numbers... final String holdElectionPath = context.electionPath + ELECTION_NODE; List seqs = zkClient.getChildren(holdElectionPath, null, true); - + sortSeqs(seqs); List intSeqs = getSeqs(seqs); if (intSeqs.size() == 0) { @@ -122,31 +124,7 @@ public class LeaderElector { return; } try { - zkClient.getData(holdElectionPath + "/" + seqs.get(index), - new Watcher() { - - @Override - public void process(WatchedEvent event) { - // session events are not change events, - // and do not remove the watcher - if (EventType.None.equals(event.getType())) { - return; - } - // am I the next leader? - try { - checkIfIamLeader(seq, context, true); - } catch (InterruptedException e) { - // Restore the interrupted status - Thread.currentThread().interrupt(); - log.warn("", e); - } catch (IOException e) { - log.warn("", e); - } catch (Exception e) { - log.warn("", e); - } - } - - }, null, true); + zkClient.getData(holdElectionPath + "/" + seqs.get(index), watcher = new ElectionWatcher(context.leaderSeqPath , seq, context) , null, true); } catch (KeeperException.SessionExpiredException e) { throw e; } catch (KeeperException e) { @@ -290,6 +268,50 @@ public class LeaderElector { return seq; } + + private class ElectionWatcher implements Watcher { + final String leaderSeqPath; + final int seq; + final ElectionContext context; + + private boolean canceled = false; + + private ElectionWatcher(String leaderSeqPath, int seq, ElectionContext context) { + this.leaderSeqPath = leaderSeqPath; + this.seq = seq; + this.context = context; + } + + void cancel(String leaderSeqPath){ + canceled = true; + + } + + @Override + public void process(WatchedEvent event) { + // session events are not change events, + // and do not remove the watcher + if (EventType.None.equals(event.getType())) { + return; + } + if(canceled) { + log.info("This watcher is not active anymore {}", leaderSeqPath); + return; + } + try { + // am I the next leader? + checkIfIamLeader(seq, context, true); + } catch (InterruptedException e) { + // Restore the interrupted status + Thread.currentThread().interrupt(); + log.warn("", e); + } catch (IOException e) { + log.warn("", e); + } catch (Exception e) { + log.warn("", e); + } + } + } /** * Set up any ZooKeeper nodes needed for leader election. @@ -317,6 +339,8 @@ public class LeaderElector { } void retryElection() throws KeeperException, InterruptedException, IOException { context.cancelElection(); + ElectionWatcher watcher = this.watcher; + if(watcher!= null) watcher.cancel(context.leaderSeqPath); joinElection(context, true); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 5e18a58fd39..94b9e0bec1a 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -70,6 +70,7 @@ public class Overseer { public static final String ADD_ROUTING_RULE = "addroutingrule"; public static final String REMOVE_ROUTING_RULE = "removeroutingrule"; public static final String STATE = "state"; + public static final String QUIT = "quit"; public static final int STATE_UPDATE_DELAY = 1500; // delay between cloud state updates public static final String CREATESHARD = "createshard"; @@ -200,85 +201,132 @@ public class Overseer { } log.info("Starting to work on the main queue"); - while (!this.isClosed) { - isLeader = amILeader(); - if (LeaderStatus.NO == isLeader) { - break; - } - else if (LeaderStatus.YES != isLeader) { - log.debug("am_i_leader unclear {}", isLeader); - continue; // not a no, not a yes, try ask again - } - DistributedQueue.QueueEvent head = null; - try { - head = stateUpdateQueue.peek(true); - } catch (KeeperException e) { - if (e.code() == KeeperException.Code.SESSIONEXPIRED) { - log.warn( - "Solr cannot talk to ZK, exiting Overseer main queue loop", e); - return; + try { + while (!this.isClosed) { + isLeader = amILeader(); + if (LeaderStatus.NO == isLeader) { + break; } - log.error("Exception in Overseer main queue loop", e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - return; - - } catch (Exception e) { - log.error("Exception in Overseer main queue loop", e); - } - synchronized (reader.getUpdateLock()) { + else if (LeaderStatus.YES != isLeader) { + log.debug("am_i_leader unclear {}", isLeader); + continue; // not a no, not a yes, try ask again + } + DistributedQueue.QueueEvent head = null; try { - reader.updateClusterState(true); - ClusterState clusterState = reader.getClusterState(); - - while (head != null) { - final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); - final String operation = message.getStr(QUEUE_OPERATION); - final TimerContext timerContext = stats.time(operation); - try { - clusterState = processMessage(clusterState, message, operation); - stats.success(operation); - } catch (Exception e) { - // generally there is nothing we can do - in most cases, we have - // an issue that will fail again on retry or we cannot communicate with - // ZooKeeper in which case another Overseer should take over - // TODO: if ordering for the message is not important, we could - // track retries and put it back on the end of the queue - log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e); - stats.error(operation); - } finally { - timerContext.stop(); - } - workQueue.offer(head.getBytes()); - - stateUpdateQueue.poll(); - - if (System.nanoTime() - lastUpdatedTime > TimeUnit.NANOSECONDS.convert(STATE_UPDATE_DELAY, TimeUnit.MILLISECONDS)) break; - - // if an event comes in the next 100ms batch it together - head = stateUpdateQueue.peek(100); - } - lastUpdatedTime = System.nanoTime(); - zkClient.setData(ZkStateReader.CLUSTER_STATE, - ZkStateReader.toJSON(clusterState), true); - // clean work queue - while (workQueue.poll() != null) ; - + head = stateUpdateQueue.peek(true); } catch (KeeperException e) { if (e.code() == KeeperException.Code.SESSIONEXPIRED) { - log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e); + log.warn( + "Solr cannot talk to ZK, exiting Overseer main queue loop", e); return; } log.error("Exception in Overseer main queue loop", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return; - + } catch (Exception e) { log.error("Exception in Overseer main queue loop", e); } + synchronized (reader.getUpdateLock()) { + try { + reader.updateClusterState(true); + ClusterState clusterState = reader.getClusterState(); + + while (head != null) { + final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); + final String operation = message.getStr(QUEUE_OPERATION); + final TimerContext timerContext = stats.time(operation); + try { + clusterState = processMessage(clusterState, message, operation); + stats.success(operation); + } catch (Exception e) { + // generally there is nothing we can do - in most cases, we have + // an issue that will fail again on retry or we cannot communicate with + // ZooKeeper in which case another Overseer should take over + // TODO: if ordering for the message is not important, we could + // track retries and put it back on the end of the queue + log.error("Overseer could not process the current clusterstate state update message, skipping the message.", e); + stats.error(operation); + } finally { + timerContext.stop(); + } + workQueue.offer(head.getBytes()); + + stateUpdateQueue.poll(); + + if (isClosed || System.nanoTime() - lastUpdatedTime > TimeUnit.NANOSECONDS.convert(STATE_UPDATE_DELAY, TimeUnit.MILLISECONDS)) break; + + // if an event comes in the next 100ms batch it together + head = stateUpdateQueue.peek(100); + } + lastUpdatedTime = System.nanoTime(); + zkClient.setData(ZkStateReader.CLUSTER_STATE, + ZkStateReader.toJSON(clusterState), true); + // clean work queue + while (workQueue.poll() != null) ; + + } catch (KeeperException e) { + if (e.code() == KeeperException.Code.SESSIONEXPIRED) { + log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e); + return; + } + log.error("Exception in Overseer main queue loop", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + + } catch (Exception e) { + log.error("Exception in Overseer main queue loop", e); + } + } + } - + } finally { + log.info("Overseer Loop exiting : {}", LeaderElector.getNodeName(myId)); + new Thread("OverseerExitThread"){ + //do this in a separate thread because any wait is interrupted in this main thread + @Override + public void run() { + checkIfIamStillLeader(); + } + }.start(); + } + } + + private void checkIfIamStillLeader() { + org.apache.zookeeper.data.Stat stat = new org.apache.zookeeper.data.Stat(); + String path = "/overseer_elect/leader"; + byte[] data = null; + try { + data = zkClient.getData(path, null, stat, true); + } catch (Exception e) { + log.error("could not read the data" ,e); + return; + } + Map m = (Map) ZkStateReader.fromJSON(data); + String id = (String) m.get("id"); + if(overseerCollectionProcessor.getId().equals(id)){ + try { + log.info("I'm exiting , but I'm still the leader"); + zkClient.delete(path,stat.getVersion(),true); + } catch (KeeperException.BadVersionException e) { + //no problem ignore it some other Overseer has already taken over + } catch (Exception e) { + log.error("Could not delete my leader node ", e); + } finally { + try { + if(zkController !=null && !zkController.getCoreContainer().isShutDown()){ + zkController.rejoinOverseerElection(); + } + + } catch (Exception e) { + log.error("error canceling overseer election election ",e); + } + } + + } else{ + log.info("somebody else has already taken up the overseer position"); } } @@ -324,7 +372,11 @@ public class Overseer { clusterState = removeRoutingRule(clusterState, message); } else if(CLUSTERPROP.isEqual(operation)){ handleProp(message); - } else { + } else if( QUIT.equals(operation)){ + log.info("Quit command received {}", LeaderElector.getNodeName(myId)); + overseerCollectionProcessor.close(); + close(); + } else{ throw new RuntimeException("unknown operation:" + operation + " contents:" + message.getProperties()); } @@ -1107,15 +1159,18 @@ public class Overseer { private String adminPath; - private OverseerCollectionProcessor ocp; + private OverseerCollectionProcessor overseerCollectionProcessor; + + private ZkController zkController; private Stats stats; // overseer not responsible for closing reader - public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader) throws KeeperException, InterruptedException { + public Overseer(ShardHandler shardHandler, String adminPath, final ZkStateReader reader, ZkController zkController) throws KeeperException, InterruptedException { this.reader = reader; this.shardHandler = shardHandler; this.adminPath = adminPath; + this.zkController = zkController; this.stats = new Stats(); } @@ -1130,8 +1185,8 @@ public class Overseer { ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process."); - ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath, stats); - ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id); + overseerCollectionProcessor = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath, stats); + ccThread = new OverseerThread(ccTg, overseerCollectionProcessor, "Overseer-" + id); ccThread.setDaemon(true); updaterThread.start(); diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index a07acdb7816..f4abda8d5b9 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -88,6 +88,8 @@ import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP; import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP; import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA; import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.CLUSTERSTATUS; +import static org.apache.solr.common.params.CollectionParams.CollectionAction.LIST; import static org.apache.solr.common.params.CollectionParams.CollectionAction.OVERSEERSTATUS; import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE; @@ -136,7 +138,6 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { public static final String COLL_PROP_PREFIX = "property."; - public static final Set KNOWN_CLUSTER_PROPS = ImmutableSet.of(ZkStateReader.LEGACY_CLOUD, ZkStateReader.URL_SCHEME); public static final Map COLL_PROPS = ZkNodeProps.makeMap( @@ -220,6 +221,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } QueueEvent head = workQueue.peek(true); + if(isClosed) break; final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); final String asyncId = (message.containsKey(ASYNC) && message.get(ASYNC) != null) ? (String) message.get(ASYNC) : null; @@ -287,7 +289,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } private void prioritizeOverseerNodes() throws KeeperException, InterruptedException { - log.info("prioritizing overseer nodes"); + log.info("prioritizing overseer nodes at {}", LeaderElector.getNodeName(myId)); SolrZkClient zk = zkStateReader.getZkClient(); if(!zk.exists(ZkStateReader.ROLES,true))return; Map m = (Map) ZkStateReader.fromJSON(zk.getData(ZkStateReader.ROLES, null, new Stat(), true)); @@ -299,6 +301,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { List nodeNames = getSortedOverseerNodeNames(zk); if(nodeNames.size()<2) return; + boolean designateIsInFront = overseerDesignates.contains( nodeNames.get(0)); // ArrayList nodesTobePushedBack = new ArrayList<>(); @@ -306,25 +309,25 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { List availableDesignates = new ArrayList<>(); log.info("sorted nodes {}", nodeNames);//TODO to be removed - for (int i = 0; i < nodeNames.size(); i++) { + for (int i = 1; i < nodeNames.size(); i++) { String s = nodeNames.get(i); if (overseerDesignates.contains(s)) { availableDesignates.add(s); - for(int j=0;j1) break; + if(availableDesignates.size()>1) break;//we don't need to line up more than 2 designates } if(!availableDesignates.isEmpty()){ - for (int i = nodesTobePushedBack.size() - 1; i >= 0; i--) { - String s = nodesTobePushedBack.get(i); + for (String s : nodesTobePushedBack) { log.info("pushing back {} ", s); invokeOverseerOp(s, "rejoin"); } @@ -358,18 +361,22 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { log.warn("available designates and current state {} {} ", availableDesignates, getSortedOverseerNodeNames(zk)); } - } else { + } else if(!designateIsInFront) { log.warn("No overseer designates are available, overseerDesignates: {}, live nodes : {}",overseerDesignates,nodeNames); return; } String leaderNode = getLeaderNode(zkStateReader.getZkClient()); if(leaderNode ==null) return; - if(!overseerDesignates.contains(leaderNode) && !availableDesignates.isEmpty()){ - //this means there are designated Overseer nodes and I am not one of them , kill myself - String newLeader = availableDesignates.get(0); - log.info("I am not an overseerdesignate , forcing a new leader {} ", newLeader); - invokeOverseerOp(newLeader, "leader"); + if(!overseerDesignates.contains(leaderNode) ){ + List sortedNodes = getSortedOverseerNodeNames(zk); + + if(leaderNode.equals(sortedNodes.get(0)) || // I am leader and I am in front of the queue + overseerDesignates.contains(sortedNodes.get(0))) {// I am leader but somebody else is in the front , Screwed up leader election + //this means there are I am not a designate and the next guy is lined up to become the leader, kill myself + log.info("I am not an overseer designate , forcing myself out {} ", leaderNode); + Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, Overseer.QUIT))); + } } } @@ -470,13 +477,13 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { processRoleCommand(message, operation); } else if (ADDREPLICA.isEqual(operation)) { addReplica(zkStateReader.getClusterState(), message, results); - } else if (REQUESTSTATUS.equals(operation)) { - requestStatus(message, results); } else if (OVERSEERSTATUS.isEqual(operation)) { getOverseerStatus(message, results); - } - - else { + } else if(LIST.isEqual(operation)) { + listCollections(zkStateReader.getClusterState(), results); + } else if (CLUSTERSTATUS.isEqual(operation)) { + getClusterStatus(zkStateReader.getClusterState(), message, results); + } else { throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown operation:" + operation); } @@ -567,6 +574,131 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } + private void getClusterStatus(ClusterState clusterState, ZkNodeProps message, NamedList results) { + String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + + // read aliases + Aliases aliases = zkStateReader.getAliases(); + Map> collectionVsAliases = new HashMap<>(); + Map aliasVsCollections = aliases.getCollectionAliasMap(); + if (aliasVsCollections != null) { + for (Map.Entry entry : aliasVsCollections.entrySet()) { + List colls = StrUtils.splitSmart(entry.getValue(), ','); + String alias = entry.getKey(); + for (String coll : colls) { + if (collection == null || collection.equals(coll)) { + List list = collectionVsAliases.get(coll); + if (list == null) { + list = new ArrayList<>(); + collectionVsAliases.put(coll, list); + } + list.add(alias); + } + } + } + } + + // convert cluster state into a map of writable types + byte[] bytes = ZkStateReader.toJSON(clusterState); + Map stateMap = (Map) ZkStateReader.fromJSON(bytes); + + String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); + NamedList collectionProps = new SimpleOrderedMap(); + if (collection == null) { + Set collections = clusterState.getCollections(); + for (String name : collections) { + Map collectionStatus = getCollectionStatus(stateMap, name, shard); + if (collectionVsAliases.containsKey(name) && !collectionVsAliases.get(name).isEmpty()) { + collectionStatus.put("aliases", collectionVsAliases.get(name)); + } + collectionProps.add(name, collectionStatus); + } + } else { + String routeKey = message.getStr(ShardParams._ROUTE_); + if (routeKey == null) { + Map collectionStatus = getCollectionStatus(stateMap, collection, shard); + if (collectionVsAliases.containsKey(collection) && !collectionVsAliases.get(collection).isEmpty()) { + collectionStatus.put("aliases", collectionVsAliases.get(collection)); + } + collectionProps.add(collection, collectionStatus); + } else { + DocCollection docCollection = clusterState.getCollection(collection); + DocRouter router = docCollection.getRouter(); + Collection slices = router.getSearchSlices(routeKey, null, docCollection); + String s = ""; + for (Slice slice : slices) { + s += slice.getName() + ","; + } + if (shard != null) { + s += shard; + } + Map collectionStatus = getCollectionStatus(stateMap, collection, s); + if (collectionVsAliases.containsKey(collection) && !collectionVsAliases.get(collection).isEmpty()) { + collectionStatus.put("aliases", collectionVsAliases.get(collection)); + } + collectionProps.add(collection, collectionStatus); + } + } + + + NamedList clusterStatus = new SimpleOrderedMap<>(); + clusterStatus.add("collections", collectionProps); + + // read cluster properties + Map clusterProps = zkStateReader.getClusterProps(); + if (clusterProps != null && !clusterProps.isEmpty()) { + clusterStatus.add("properties", clusterProps); + } + + // add the alias map too + if (aliasVsCollections != null && !aliasVsCollections.isEmpty()) { + clusterStatus.add("aliases", aliasVsCollections); + } + + results.add("cluster", clusterStatus); + } + + /** + * Get collection status from cluster state. + * Can return collection status by given shard name. + * + * + * @param clusterState cloud state map parsed from JSON-serialized {@link ClusterState} + * @param name collection name + * @param shardStr comma separated shard names + * @return map of collection properties + */ + private Map getCollectionStatus(Map clusterState, String name, String shardStr) { + Map docCollection = (Map) clusterState.get(name); + if (docCollection == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + name + " not found"); + } + if (shardStr == null) { + return docCollection; + } else { + Map shards = (Map) docCollection.get("shards"); + Map selected = new HashMap<>(); + List selectedShards = Arrays.asList(shardStr.split(",")); + for (String selectedShard : selectedShards) { + if (!shards.containsKey(selectedShard)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + name + " shard: " + selectedShard + " not found"); + } + selected.put(selectedShard, shards.get(selectedShard)); + docCollection.put("shards", selected); + } + return docCollection; + } + } + + private void listCollections(ClusterState clusterState, NamedList results) { + Set collections = clusterState.getCollections(); + List collectionList = new ArrayList(); + for (String collection : collections) { + collectionList.add(collection); + } + results.add("collections", collectionList); + } + private void processRoleCommand(ZkNodeProps message, String operation) throws KeeperException, InterruptedException { SolrZkClient zkClient = zkStateReader.getZkClient(); Map roles = null; @@ -1372,40 +1504,6 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } while (srsp != null); } - private void requestStatus(ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException { - log.info("Request status invoked"); - String requestId = message.getStr(REQUESTID); - - // Special taskId (-1), clears up the request state maps. - if(requestId.equals("-1")) { - completedMap.clear(); - failureMap.clear(); - return; - } - - if(completedMap.contains(requestId)) { - SimpleOrderedMap success = new SimpleOrderedMap(); - success.add("state", "completed"); - success.add("msg", "found " + requestId + " in completed tasks"); - results.add("status", success); - } else if (runningMap.contains(requestId)) { - SimpleOrderedMap success = new SimpleOrderedMap(); - success.add("state", "running"); - success.add("msg", "found " + requestId + " in submitted tasks"); - results.add("status", success); - } else if (failureMap.contains(requestId)) { - SimpleOrderedMap success = new SimpleOrderedMap(); - success.add("state", "failed"); - success.add("msg", "found " + requestId + " in failed tasks"); - results.add("status", success); - } else { - SimpleOrderedMap failure = new SimpleOrderedMap(); - failure.add("state", "notfound"); - failure.add("msg", "Did not find taskid [" + requestId + "] in any tasks queue"); - results.add("status", failure); - } - } - private void deleteShard(ClusterState clusterState, ZkNodeProps message, NamedList results) { log.info("Delete shard invoked"); String collection = message.getStr(ZkStateReader.COLLECTION_PROP); @@ -2277,5 +2375,9 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } while (srsp != null); } while(true); } + String getId(){ + return myId; + } + } diff --git a/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java b/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java index dc4e76132d2..8a00887bce7 100644 --- a/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java +++ b/solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java @@ -25,11 +25,11 @@ import java.io.InputStreamReader; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Properties; import java.util.Map.Entry; -import org.apache.lucene.util.IOUtils; import org.apache.solr.common.SolrException; import org.apache.zookeeper.server.ServerConfig; import org.apache.zookeeper.server.ZooKeeperServerMain; @@ -179,7 +179,7 @@ class SolrZkServerProps extends QuorumPeerConfig { Properties cfg = new Properties(); FileInputStream in = new FileInputStream(configFile); try { - cfg.load(new InputStreamReader(in, IOUtils.CHARSET_UTF_8)); + cfg.load(new InputStreamReader(in, StandardCharsets.UTF_8)); } finally { in.close(); } @@ -461,7 +461,7 @@ class SolrZkServerProps extends QuorumPeerConfig { + " file is missing"); } - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(myIdFile), IOUtils.CHARSET_UTF_8)); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(myIdFile), StandardCharsets.UTF_8)); String myIdString; try { myIdString = br.readLine(); diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java index 4190aa3f9a4..ef5c469e894 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java @@ -20,10 +20,12 @@ import org.apache.zookeeper.data.ACL; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.concurrent.TimeoutException; @@ -253,7 +255,7 @@ public class ZkCLI { System.out.println("-" + PUT + " requires two args - the path to create and the data string"); System.exit(1); } - zkClient.create(arglist.get(0).toString(), arglist.get(1).toString().getBytes("UTF-8"), + zkClient.create(arglist.get(0).toString(), arglist.get(1).toString().getBytes(StandardCharsets.UTF_8), acl, CreateMode.PERSISTENT, true); } else if (line.getOptionValue(CMD).equals(PUT_FILE)) { List arglist = line.getArgList(); @@ -276,7 +278,7 @@ public class ZkCLI { System.exit(1); } byte [] data = zkClient.getData(arglist.get(0).toString(), null, null, true); - System.out.println(new String(data, "UTF-8")); + System.out.println(new String(data, StandardCharsets.UTF_8)); } else if (line.getOptionValue(CMD).equals(GET_FILE)) { List arglist = line.getArgList(); if (arglist.size() != 2) { diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 1bd3acfd7bd..aa57e689337 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -304,18 +304,6 @@ public final class ZkController { return leaderConflictResolveWait; } - public void forceOverSeer(){ - try { - zkClient.delete("/overseer_elect/leader",-1, true); - log.info("Forcing me to be leader {} ", getBaseUrl()); - overseerElector.getContext().runLeaderProcess(true, Overseer.STATE_UPDATE_DELAY + 100); - } catch (Exception e) { - throw new SolrException(ErrorCode.SERVER_ERROR, " Error becoming overseer ",e); - - } - - } - private void registerAllCoresAsDown( final CurrentCoreDescriptorProvider registerOnReconnect, boolean updateLastPublished) { List descriptors = registerOnReconnect @@ -558,7 +546,7 @@ public final class ZkController { adminPath = cc.getAdminPath(); overseerElector = new LeaderElector(zkClient); - this.overseer = new Overseer(shardHandler, adminPath, zkStateReader); + this.overseer = new Overseer(shardHandler, adminPath, zkStateReader,this); ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName()); overseerElector.setup(context); overseerElector.joinElection(context, false); @@ -1626,7 +1614,7 @@ public final class ZkController { return hostName + ':' + hostPort + '_' + URLEncoder.encode(trimLeadingAndTrailingSlashes(hostContext), "UTF-8"); } catch (UnsupportedEncodingException e) { - throw new IllegalStateException("JVM Does not seem to support UTF-8", e); + throw new Error("JVM Does not seem to support UTF-8", e); } } @@ -1679,4 +1667,8 @@ public final class ZkController { } } + CoreContainer getCoreContainer(){ + return cc; + } + } diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java index 0356698596e..dae776ddc1f 100644 --- a/solr/core/src/java/org/apache/solr/core/ConfigSolr.java +++ b/solr/core/src/java/org/apache/solr/core/ConfigSolr.java @@ -17,7 +17,6 @@ package org.apache.solr.core; * limitations under the License. */ -import com.google.common.base.Charsets; import com.google.common.io.ByteStreams; import org.apache.commons.io.IOUtils; @@ -42,6 +41,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.Properties; @@ -77,19 +77,17 @@ public abstract class ConfigSolr { } public static ConfigSolr fromString(SolrResourceLoader loader, String xml) { - return fromInputStream(loader, new ByteArrayInputStream(xml.getBytes(Charsets.UTF_8))); + return fromInputStream(loader, new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))); } public static ConfigSolr fromInputStream(SolrResourceLoader loader, InputStream is) { try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ByteStreams.copy(is, baos); - String originalXml = IOUtils.toString(new ByteArrayInputStream(baos.toByteArray()), "UTF-8"); - ByteArrayInputStream dup = new ByteArrayInputStream(baos.toByteArray()); + byte[] buf = IOUtils.toByteArray(is); + String originalXml = new String(buf, StandardCharsets.UTF_8); + ByteArrayInputStream dup = new ByteArrayInputStream(buf); Config config = new Config(loader, null, new InputSource(dup), null, false); return fromConfig(config, originalXml); - } - catch (Exception e) { + } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } @@ -241,7 +239,7 @@ public abstract class ConfigSolr { } public ConfigSetService createCoreConfigService(SolrResourceLoader loader, ZkController zkController) { - if (getZkHost() != null) + if (getZkHost() != null || System.getProperty("zkRun") != null) return new CloudConfigSetService(loader, zkController); if (hasSchemaCache()) return new ConfigSetService.SchemaCaching(loader, getConfigSetBaseDirectory()); diff --git a/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java b/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java index b9776fdfa17..df5f9b1227d 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java +++ b/solr/core/src/java/org/apache/solr/core/CoreDescriptor.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.Properties; @@ -213,7 +214,7 @@ public class CoreDescriptor { try { in = new FileInputStream(propertiesFile); Properties externalProps = new Properties(); - externalProps.load(new InputStreamReader(in, "UTF-8")); + externalProps.load(new InputStreamReader(in, StandardCharsets.UTF_8)); coreProperties.putAll(externalProps); } catch (IOException e) { String message = String.format(Locale.ROOT, "Could not load properties from %s: %s:", diff --git a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java index 104effbe6fb..e2a59429ac6 100644 --- a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java +++ b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java @@ -17,7 +17,6 @@ package org.apache.solr.core; * limitations under the License. */ -import com.google.common.base.Charsets; import com.google.common.collect.Lists; import org.apache.solr.common.SolrException; import org.apache.solr.util.IOUtils; @@ -31,6 +30,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Properties; @@ -79,7 +79,7 @@ public class CorePropertiesLocator implements CoresLocator { Writer os = null; try { propfile.getParentFile().mkdirs(); - os = new OutputStreamWriter(new FileOutputStream(propfile), Charsets.UTF_8); + os = new OutputStreamWriter(new FileOutputStream(propfile), StandardCharsets.UTF_8); p.store(os, "Written by CorePropertiesLocator"); } catch (IOException e) { @@ -147,7 +147,7 @@ public class CorePropertiesLocator implements CoresLocator { File instanceDir = propertiesFile.getParentFile(); Properties coreProperties = new Properties(); fis = new FileInputStream(propertiesFile); - coreProperties.load(new InputStreamReader(fis, Charsets.UTF_8)); + coreProperties.load(new InputStreamReader(fis, StandardCharsets.UTF_8)); String name = createName(coreProperties, instanceDir); return new CoreDescriptor(cc, name, instanceDir.getAbsolutePath(), coreProperties); } diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 3109fd1be6c..32a021aa755 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -99,6 +99,7 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; + import java.io.Closeable; import java.io.File; import java.io.FileNotFoundException; @@ -108,6 +109,7 @@ import java.io.InputStreamReader; import java.io.Writer; import java.lang.reflect.Constructor; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Arrays; @@ -274,7 +276,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable { if (input != null) { final InputStream is = new PropertiesInputStream(input); try { - p.load(new InputStreamReader(is, "UTF-8")); + p.load(new InputStreamReader(is, StandardCharsets.UTF_8)); String s = p.getProperty("index"); if (s != null && s.trim().length() > 0) { diff --git a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java index bb375a9acbc..bc95767e5c8 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java +++ b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java @@ -48,6 +48,7 @@ import javax.naming.Context; import javax.naming.InitialContext; import javax.naming.NamingException; import javax.naming.NoInitialContextException; + import java.io.Closeable; import java.io.File; import java.io.FileFilter; @@ -61,6 +62,7 @@ import java.net.URL; import java.net.URLClassLoader; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -93,7 +95,7 @@ public class SolrResourceLoader implements ResourceLoader,Closeable private final List waitingForCore = Collections.synchronizedList(new ArrayList()); private final List infoMBeans = Collections.synchronizedList(new ArrayList()); private final List waitingForResources = Collections.synchronizedList(new ArrayList()); - private static final Charset UTF_8 = Charset.forName("UTF-8"); + private static final Charset UTF_8 = StandardCharsets.UTF_8; //TODO: Solr5. Remove this completely when you obsolete putting tags in solr.xml (See Solr-4196) private final Properties coreProperties; diff --git a/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java b/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java index bd59ad80f4d..bd7052341f4 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java +++ b/solr/core/src/java/org/apache/solr/core/SolrXMLCoresLocator.java @@ -17,7 +17,6 @@ package org.apache.solr.core; * limitations under the License. */ -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; @@ -29,6 +28,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -157,7 +157,7 @@ public class SolrXMLCoresLocator implements CoresLocator { FileOutputStream fos = null; try { fos = new FileOutputStream(file); - writer = new OutputStreamWriter(fos, Charsets.UTF_8); + writer = new OutputStreamWriter(fos, StandardCharsets.UTF_8); writer.write(xml); writer.close(); logger.info("Persisted core descriptions to {}", file.getAbsolutePath()); diff --git a/solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java b/solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java index c1cd720b80c..6b751a4db8a 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java +++ b/solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java @@ -28,6 +28,7 @@ import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; + import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; @@ -37,6 +38,7 @@ import java.io.OutputStreamWriter; import java.io.StringWriter; import java.io.Writer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Properties; @@ -169,7 +171,7 @@ public class SolrXMLSerializer { tmpFile = File.createTempFile("solr", ".xml", file.getParentFile()); java.io.FileOutputStream out = new java.io.FileOutputStream(tmpFile); - Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); + Writer writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8)); try { persist(writer, solrXMLDef); } finally { diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index 5da16a4a8ce..5298a4c9c5b 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -26,6 +26,7 @@ import java.io.OutputStream; import java.io.Writer; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Arrays; @@ -81,8 +82,6 @@ import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8; - /** *

        A Handler which provides a REST API for replication and serves replication requests from Slaves.

        *

        When running on the master, it provides the following commands

        1. Get the current replicable index version @@ -823,7 +822,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw try { final InputStream is = new PropertiesInputStream(input); Properties props = new Properties(); - props.load(new InputStreamReader(is, CHARSET_UTF_8)); + props.load(new InputStreamReader(is, StandardCharsets.UTF_8)); return props; } finally { input.close(); diff --git a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java index 3b2caf07771..65da7932737 100644 --- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java +++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java @@ -26,6 +26,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.NoSuchFileException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -90,7 +91,6 @@ import org.eclipse.jetty.util.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.lucene.util.IOUtils.CHARSET_UTF_8; import static org.apache.solr.handler.ReplicationHandler.ALIAS; import static org.apache.solr.handler.ReplicationHandler.CHECKSUM; import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS; @@ -604,7 +604,7 @@ public class SnapPuller { } final IndexOutput out = dir.createOutput(REPLICATION_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE); - Writer outFile = new OutputStreamWriter(new PropertiesOutputStream(out), CHARSET_UTF_8); + Writer outFile = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8); try { props.store(outFile, "Replication details"); dir.sync(Collections.singleton(REPLICATION_PROPERTIES)); @@ -945,7 +945,7 @@ public class SnapPuller { final InputStream is = new PropertiesInputStream(input); try { - p.load(new InputStreamReader(is, CHARSET_UTF_8)); + p.load(new InputStreamReader(is, StandardCharsets.UTF_8)); } catch (Exception e) { LOG.error("Unable to load " + SnapPuller.INDEX_PROPERTIES, e); } finally { @@ -961,7 +961,7 @@ public class SnapPuller { p.put("index", tmpIdxDirName); Writer os = null; try { - os = new OutputStreamWriter(new PropertiesOutputStream(out), CHARSET_UTF_8); + os = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8); p.store(os, SnapPuller.INDEX_PROPERTIES); dir.sync(Collections.singleton(INDEX_PROPERTIES)); } catch (Exception e) { diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index d1d2f7b2d0a..46acf578892 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -211,6 +211,14 @@ public class CollectionsHandler extends RequestHandlerBase { this.handleOverseerStatus(req, rsp); break; } + case LIST: { + this.handleListAction(req, rsp); + break; + } + case CLUSTERSTATUS: { + this.handleClusterStatus(req, rsp); + break; + } default: { throw new RuntimeException("Unknown action: " + action); } @@ -257,11 +265,43 @@ public class CollectionsHandler extends RequestHandlerBase { private void handleRequestStatus(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException { log.debug("REQUESTSTATUS action invoked: " + req.getParamString()); req.getParams().required().check(REQUESTID); - Map props = new HashMap(); - props.put(Overseer.QUEUE_OPERATION, OverseerCollectionProcessor.REQUESTSTATUS); - props.put(REQUESTID, req.getParams().get(REQUESTID)); - ZkNodeProps m = new ZkNodeProps(props); - handleResponse(OverseerCollectionProcessor.REQUESTSTATUS, m, rsp); + + String requestId = req.getParams().get(REQUESTID); + + if (requestId.equals("-1")) { + // Special taskId (-1), clears up the request state maps. + if(requestId.equals("-1")) { + coreContainer.getZkController().getOverseerCompletedMap().clear(); + coreContainer.getZkController().getOverseerFailureMap().clear(); + return; + } + } else { + NamedList results = new NamedList<>(); + if (coreContainer.getZkController().getOverseerCompletedMap().contains(requestId)) { + SimpleOrderedMap success = new SimpleOrderedMap(); + success.add("state", "completed"); + success.add("msg", "found " + requestId + " in completed tasks"); + results.add("status", success); + } else if (coreContainer.getZkController().getOverseerRunningMap().contains(requestId)) { + SimpleOrderedMap success = new SimpleOrderedMap(); + success.add("state", "running"); + success.add("msg", "found " + requestId + " in submitted tasks"); + results.add("status", success); + } else if (coreContainer.getZkController().getOverseerFailureMap().contains(requestId)) { + SimpleOrderedMap success = new SimpleOrderedMap(); + success.add("state", "failed"); + success.add("msg", "found " + requestId + " in failed tasks"); + results.add("status", success); + } else { + SimpleOrderedMap failure = new SimpleOrderedMap(); + failure.add("state", "notfound"); + failure.add("msg", "Did not find taskid [" + requestId + "] in any tasks queue"); + results.add("status", failure); + } + SolrResponse response = new OverseerSolrResponse(results); + + rsp.getValues().addAll(response.getResponse()); + } } private void handleResponse(String operation, ZkNodeProps m, @@ -574,6 +614,36 @@ public class CollectionsHandler extends RequestHandlerBase { handleResponse(CollectionAction.ADDREPLICA.toString(), m, rsp); } + /** + * Handle cluster status request. + * Can return status per specific collection/shard or per all collections. + * + * @param req solr request + * @param rsp solr response + */ + private void handleClusterStatus(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException { + Map props = new HashMap<>(); + props.put(Overseer.QUEUE_OPERATION, CollectionAction.CLUSTERSTATUS.toLower()); + copyIfNotNull(req.getParams(), props, COLLECTION_PROP, SHARD_ID_PROP, ShardParams._ROUTE_); + handleResponse(CollectionAction.CLUSTERSTATUS.toString(), new ZkNodeProps(props), rsp); + } + + /** + * Handled list collection request. + * Do list collection request to zk host + * + * @param req solr request + * @param rsp solr response + * @throws KeeperException zk connection failed + * @throws InterruptedException connection interrupted + */ + private void handleListAction(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException { + Map props = ZkNodeProps.makeMap( + Overseer.QUEUE_OPERATION, CollectionAction.LIST.toString().toLowerCase(Locale.ROOT)); + handleResponse(CollectionAction.LIST.toString(), new ZkNodeProps(props), rsp); + } + + public static ModifiableSolrParams params(String... params) { ModifiableSolrParams msp = new ModifiableSolrParams(); for (int i=0; i * expand=true
          - * expand.rows=5
          - * expand.sort=field asc|desc - * + * expand.rows=5
          + * expand.sort=field asc|desc
          + * expand.q=*:* (optional, overrides the main query)
          + * expand.fq=type:child (optional, overrides the main filter queries)
          + * expand.field=field (mandatory if the not used with the CollapsingQParserPlugin)
          **/ public class ExpandComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware { @@ -117,8 +117,26 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia return; } - String field = null; + String field = params.get(ExpandParams.EXPAND_FIELD); + if(field == null) { + List filters = rb.getFilters(); + if(filters != null) { + for(Query q : filters) { + if(q instanceof CollapsingQParserPlugin.CollapsingPostFilter) { + CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter)q; + field = cp.getField(); + } + } + } + } + + if(field == null) { + throw new IOException("Expand field is null."); + } + String sortParam = params.get(ExpandParams.EXPAND_SORT); + String[] fqs = params.getParams(ExpandParams.EXPAND_FQ); + String qs = params.get(ExpandParams.EXPAND_Q); int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5); Sort sort = null; @@ -127,20 +145,40 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort(); } - Query query = rb.getQuery(); - List filters = rb.getFilters(); - List newFilters = new ArrayList(); - for(Query q : filters) { - if(!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { - newFilters.add(q); - } else { - CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter)q; - field = cp.getField(); + Query query = null; + if(qs == null) { + query = rb.getQuery(); + } else { + try { + QParser parser = QParser.getParser(qs, null, req); + query = parser.getQuery(); + } catch(Exception e) { + throw new IOException(e); } } - if(field == null) { - throw new IOException("Expand field is null."); + List newFilters = new ArrayList(); + + if(fqs == null) { + List filters = rb.getFilters(); + if(filters != null) { + for(Query q : filters) { + if(!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { + newFilters.add(q); + } + } + } + } else { + try { + for (String fq : fqs) { + if (fq != null && fq.trim().length()!=0 && !fq.equals("*:*")) { + QParser fqp = QParser.getParser(fq, null, req); + newFilters.add(fqp.getQuery()); + } + } + } catch(Exception e) { + throw new IOException(e); + } } SolrIndexSearcher searcher = req.getSearcher(); diff --git a/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java b/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java index 3f8096c63a4..50f598bac7f 100644 --- a/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java +++ b/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java @@ -21,8 +21,7 @@ package org.apache.solr.internal.csv.writer; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; - -import org.apache.lucene.util.IOUtils; +import java.nio.charset.StandardCharsets; /** * Tries to guess a config based on an InputStream. @@ -76,7 +75,7 @@ public class CSVConfigGuesser { public CSVConfig guess() { try { // tralalal - BufferedReader bIn = new BufferedReader(new InputStreamReader(getInputStream(), IOUtils.CHARSET_UTF_8)); + BufferedReader bIn = new BufferedReader(new InputStreamReader(getInputStream(), StandardCharsets.UTF_8)); String[] lines = new String[10]; String line = null; int counter = 0; diff --git a/solr/core/src/java/org/apache/solr/rest/BaseSolrResource.java b/solr/core/src/java/org/apache/solr/rest/BaseSolrResource.java index e22f1da79c5..29fb474b178 100644 --- a/solr/core/src/java/org/apache/solr/rest/BaseSolrResource.java +++ b/solr/core/src/java/org/apache/solr/rest/BaseSolrResource.java @@ -45,13 +45,14 @@ import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URLDecoder; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; /** * Base class of all Solr Restlet server resource classes. */ public abstract class BaseSolrResource extends ServerResource { - protected static final Charset UTF8 = Charset.forName("UTF-8"); + protected static final Charset UTF8 = StandardCharsets.UTF_8; protected static final String SHOW_DEFAULTS = "showDefaults"; private SolrCore solrCore; @@ -161,7 +162,7 @@ public abstract class BaseSolrResource extends ServerResource { binWriter.write(outputStream, solrRequest, solrResponse); } else { String charset = ContentStreamBase.getCharsetFromContentType(contentType); - Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8")) + Writer out = (charset == null) ? new OutputStreamWriter(outputStream, UTF8) : new OutputStreamWriter(outputStream, charset); out = new FastWriter(out); diff --git a/solr/core/src/java/org/apache/solr/rest/ManagedResourceStorage.java b/solr/core/src/java/org/apache/solr/rest/ManagedResourceStorage.java index e4218ca2348..e10b8a29c3e 100644 --- a/solr/core/src/java/org/apache/solr/rest/ManagedResourceStorage.java +++ b/solr/core/src/java/org/apache/solr/rest/ManagedResourceStorage.java @@ -29,6 +29,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -419,7 +420,7 @@ public abstract class ManagedResourceStorage { public static final Logger log = LoggerFactory.getLogger(ManagedResourceStorage.class); - public static final Charset UTF_8 = Charset.forName("UTF-8"); + public static final Charset UTF_8 = StandardCharsets.UTF_8; protected StorageIO storageIO; protected SolrResourceLoader loader; diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java new file mode 100644 index 00000000000..8a4bcbcec3d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java @@ -0,0 +1,349 @@ +package org.apache.solr.rest.schema.analysis; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.text.ParseException; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.SynonymFilterFactory; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.util.CharsRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage.StorageIO; +import org.restlet.data.Status; +import org.restlet.resource.ResourceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TokenFilterFactory and ManagedResource implementation for + * doing CRUD on synonyms using the REST API. + */ +public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { + + public static final Logger log = LoggerFactory.getLogger(ManagedSynonymFilterFactory.class); + + public static final String SYNONYM_MAPPINGS = "synonymMappings"; + public static final String IGNORE_CASE_INIT_ARG = "ignoreCase"; + + /** + * ManagedResource implementation for synonyms, which are so specialized that + * it makes sense to implement this class as an inner class as it has little + * application outside the SynonymFilterFactory use cases. + */ + public static class SynonymManager extends ManagedResource + implements ManagedResource.ChildResourceSupport + { + + // TODO: Maybe hold this using a SoftReference / WeakReference to + // reduce memory in case the set of synonyms is large and the JVM + // is running low on memory? + protected Map> synonymMappings; + + public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + } + + @SuppressWarnings("unchecked") + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException + { + NamedList initArgs = (NamedList)managedInitArgs; + + String format = (String)initArgs.get("format"); + if (format != null && !"solr".equals(format)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid format "+ + format+"! Only 'solr' is supported."); + } + + // the default behavior is to not ignore case, + // so if not supplied, then install the default + if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) { + initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE); + } + boolean ignoreCase = getIgnoreCase(managedInitArgs); + synonymMappings = new TreeMap<>(); + if (managedData != null) { + Map storedSyns = (Map)managedData; + for (String key : storedSyns.keySet()) { + // give the nature of our JSON parsing solution, we really have + // no guarantees on what is in the file + Object mapping = storedSyns.get(key); + if (!(mapping instanceof List)) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Invalid synonym file format! Expected a list of synonyms for "+key+ + " but got "+mapping.getClass().getName()); + } + + // if we're configured to ignoreCase, then we build the mappings with all lower + List vals = (List)storedSyns.get(key); + Set sortedVals = new TreeSet<>(); + if (ignoreCase) { + for (String next : vals) { + sortedVals.add(applyCaseSetting(ignoreCase, next)); + } + } else { + sortedVals.addAll(vals); + } + + synonymMappings.put(applyCaseSetting(ignoreCase, key), sortedVals); + } + } + + log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId()); + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (!(updates instanceof Map)) { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, + "Unsupported data format (" + updates.getClass().getName() + "); expected a JSON object (Map)!"); + } + boolean ignoreCase = getIgnoreCase(); + boolean madeChanges = false; + Map jsonMap = (Map)updates; + for (String term : jsonMap.keySet()) { + + term = applyCaseSetting(ignoreCase, term); + + Set output = synonymMappings.get(term); + + Object val = jsonMap.get(term); + if (val instanceof String) { + String strVal = applyCaseSetting(ignoreCase, (String)val); + + if (output == null) { + output = new TreeSet<>(); + synonymMappings.put(term, output); + } + + if (output.add(strVal)) { + madeChanges = true; + } + } else if (val instanceof List) { + List vals = (List)val; + + if (output == null) { + output = new TreeSet<>(); + synonymMappings.put(term, output); + } + + for (String nextVal : vals) { + if (output.add(applyCaseSetting(ignoreCase, nextVal))) { + madeChanges = true; + } + } + + } else { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+ + " for "+term+"; expected single value or a JSON array!"); + } + } + + return madeChanges ? synonymMappings : null; + } + + /** + * Handles a change in the ignoreCase setting for synonyms, which requires + * a full rebuild of the synonymMappings. + */ + @Override + protected boolean updateInitArgs(NamedList updatedArgs) { + if (updatedArgs == null || updatedArgs.size() == 0) { + return false; + } + boolean currentIgnoreCase = getIgnoreCase(managedInitArgs); + boolean updatedIgnoreCase = getIgnoreCase(updatedArgs); + if (currentIgnoreCase == true && updatedIgnoreCase == false) { + throw new SolrException(ErrorCode.BAD_REQUEST, + "Changing a managed word set's ignoreCase arg from true to false is not permitted."); + } else if (currentIgnoreCase == false && updatedIgnoreCase == true) { + // ignore case policy changed ... rebuild the map + Map> rebuild = new TreeMap<>(); + for (String curr : synonymMappings.keySet()) { + Set newMappings = new TreeSet<>(); + for (String next : synonymMappings.get(curr)) { + newMappings.add(applyCaseSetting(updatedIgnoreCase, next)); + } + rebuild.put(applyCaseSetting(updatedIgnoreCase, curr), newMappings); + } + synonymMappings = rebuild; + } + + return super.updateInitArgs(updatedArgs); + } + + protected String applyCaseSetting(boolean ignoreCase, String str) { + return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str; + } + + public boolean getIgnoreCase() { + return getIgnoreCase(managedInitArgs); + } + + public boolean getIgnoreCase(NamedList initArgs) { + Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG); + // ignoreCase = false by default + return null == ignoreCase ? false : ignoreCase; + } + + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + SolrQueryResponse response = endpoint.getSolrResponse(); + if (childId != null) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + Set output = synonymMappings.get(key); + if (output == null) { + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); + } + response.add(key, output); + } else { + response.add(SYNONYM_MAPPINGS, buildMapToStore(synonymMappings)); + } + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + Set output = synonymMappings.get(key); + if (output == null) + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", key, getResourceId())); + + synonymMappings.remove(key); + storeManagedData(synonymMappings); + log.info("Removed synonym mappings for: {}", key); + } + } + + /** + * Custom SynonymMap.Parser implementation that provides synonym + * mappings from the managed JSON in this class during SynonymMap + * building. + */ + private class ManagedSynonymParser extends SynonymMap.Parser { + + SynonymManager synonymManager; + + public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) { + super(dedup, analyzer); + this.synonymManager = synonymManager; + } + + /** + * Add the managed synonyms and their mappings into the SynonymMap builder. + */ + @Override + public void parse(Reader in) throws IOException, ParseException { + for (String term : synonymManager.synonymMappings.keySet()) { + for (String mapping : synonymManager.synonymMappings.get(term)) { + add(new CharsRef(term), new CharsRef(mapping), false); + } + } + } + } + + protected SynonymFilterFactory delegate; + + public ManagedSynonymFilterFactory(Map args) { + super(args); + } + + @Override + public String getResourceId() { + return "/schema/analysis/synonyms/"+handle; + } + + protected Class getManagedResourceImplClass() { + return SynonymManager.class; + } + + /** + * Called once, during core initialization, to initialize any analysis components + * that depend on the data managed by this resource. It is important that the + * analysis component is only initialized once during core initialization so that + * text analysis is consistent, especially in a distributed environment, as we + * don't want one server applying a different set of stop words than other servers. + */ + @SuppressWarnings("unchecked") + @Override + public void onManagedResourceInitialized(NamedList initArgs, final ManagedResource res) + throws SolrException + { + NamedList args = (NamedList)initArgs; + args.add("synonyms", getResourceId()); + args.add("expand", "false"); + args.add("format", "solr"); + + Map filtArgs = new HashMap<>(); + for (Map.Entry entry : args) { + filtArgs.put(entry.getKey(), entry.getValue().toString()); + } + // create the actual filter factory that pulls the synonym mappings + // from synonymMappings using a custom parser implementation + delegate = new SynonymFilterFactory(filtArgs) { + @Override + protected SynonymMap loadSynonyms + (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) + throws IOException, ParseException { + + ManagedSynonymParser parser = + new ManagedSynonymParser((SynonymManager)res, dedup, analyzer); + // null is safe here because there's no actual parsing done against a input Reader + parser.parse(null); + return parser.build(); + } + }; + try { + delegate.inform(res.getResourceLoader()); + } catch (IOException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, e); + } + } + + @Override + public TokenStream create(TokenStream input) { + if (delegate == null) + throw new IllegalStateException(this.getClass().getName()+ + " not initialized correctly! The SynonymFilterFactory delegate was not initialized."); + + return delegate.create(input); + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/BCDIntField.java b/solr/core/src/java/org/apache/solr/schema/BCDIntField.java index 86efdf11889..17679b67b72 100644 --- a/solr/core/src/java/org/apache/solr/schema/BCDIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/BCDIntField.java @@ -66,6 +66,16 @@ public class BCDIntField extends PrimitiveFieldType { public void write(TextResponseWriter writer, String name, StorableField f) throws IOException { writer.writeInt(name,toExternal(f)); } + + @Override + public Object marshalSortValue(Object value) { + return marshalStringSortValue(value); + } + + @Override + public Object unmarshalSortValue(Object value) { + return unmarshalStringSortValue(value); + } } diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index 242de06a314..07f5089591f 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -151,6 +151,16 @@ public class BoolField extends PrimitiveFieldType { public void write(TextResponseWriter writer, String name, StorableField f) throws IOException { writer.writeBool(name, f.stringValue().charAt(0) == 'T'); } + + @Override + public Object marshalSortValue(Object value) { + return marshalStringSortValue(value); + } + + @Override + public Object unmarshalSortValue(Object value) { + return unmarshalStringSortValue(value); + } } // TODO - this can be much more efficient - use OpenBitSet or Bits diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index 13f35a8c069..bf0dc1dbfdc 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -47,7 +47,6 @@ import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -278,20 +277,11 @@ public class CollationField extends FieldType { @Override public Object marshalSortValue(Object value) { - if (null == value) { - return null; - } - final BytesRef val = (BytesRef)value; - return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + return marshalBase64SortValue(value); } @Override public Object unmarshalSortValue(Object value) { - if (null == value) { - return null; - } - final String val = (String)value; - final byte[] bytes = Base64.base64ToByteArray(val); - return new BytesRef(bytes); + return unmarshalBase64SortValue(value); } } diff --git a/solr/core/src/java/org/apache/solr/schema/DateField.java b/solr/core/src/java/org/apache/solr/schema/DateField.java index faf9c4ff93f..52eac81f8f0 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateField.java @@ -247,6 +247,16 @@ public class DateField extends PrimitiveFieldType implements DateValueFieldType return getStringSort(field,reverse); } + @Override + public Object marshalSortValue(Object value) { + return marshalStringSortValue(value); + } + + @Override + public Object unmarshalSortValue(Object value) { + return unmarshalStringSortValue(value); + } + @Override public void write(TextResponseWriter writer, String name, StorableField f) throws IOException { writer.writeDate(name, toExternal(f)); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 4d4ae5a225d..32c7a63bb8f 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -48,6 +48,7 @@ import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.response.TextResponseWriter; @@ -968,4 +969,52 @@ public abstract class FieldType extends FieldProperties { public Object unmarshalSortValue(Object value) { return value; } + + /** + * Marshals a string-based field value. + */ + protected static Object marshalStringSortValue(Object value) { + if (null == value) { + return null; + } + CharsRef spare = new CharsRef(); + UnicodeUtil.UTF8toUTF16((BytesRef)value, spare); + return spare.toString(); + } + + /** + * Unmarshals a string-based field value. + */ + protected static Object unmarshalStringSortValue(Object value) { + if (null == value) { + return null; + } + BytesRef spare = new BytesRef(); + String stringVal = (String)value; + UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare); + return spare; + } + + /** + * Marshals a binary field value. + */ + protected static Object marshalBase64SortValue(Object value) { + if (null == value) { + return null; + } + final BytesRef val = (BytesRef)value; + return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + } + + /** + * Unmarshals a binary field value. + */ + protected static Object unmarshalBase64SortValue(Object value) { + if (null == value) { + return null; + } + final String val = (String)value; + final byte[] bytes = Base64.base64ToByteArray(val); + return new BytesRef(bytes); + } } diff --git a/solr/core/src/java/org/apache/solr/schema/FloatField.java b/solr/core/src/java/org/apache/solr/schema/FloatField.java index 7e23443852d..6d8053580fa 100644 --- a/solr/core/src/java/org/apache/solr/schema/FloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/FloatField.java @@ -70,7 +70,7 @@ public class FloatField extends PrimitiveFieldType implements FloatValueFieldTyp @Override public SortField getSortField(SchemaField field,boolean reverse) { field.checkSortability(); - return new SortField(field.name,SortField.Type.FLOAT, reverse); + return new SortField(field.name, PARSER, reverse); } @Override diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java index fc44e39c32a..43a312744b3 100644 --- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java @@ -33,11 +33,13 @@ import org.w3c.dom.Document; import org.xml.sax.InputSource; import javax.xml.xpath.XPath; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -92,7 +94,7 @@ public final class ManagedIndexSchema extends IndexSchema { } } final FileOutputStream out = new FileOutputStream(managedSchemaFile); - writer = new OutputStreamWriter(out, "UTF-8"); + writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); persist(writer); log.info("Upgraded to managed schema at " + managedSchemaFile.getPath()); } catch (IOException e) { @@ -132,7 +134,7 @@ public final class ManagedIndexSchema extends IndexSchema { StringWriter writer = new StringWriter(); persist(writer); - final byte[] data = writer.toString().getBytes("UTF-8"); + final byte[] data = writer.toString().getBytes(StandardCharsets.UTF_8); if (createOnly) { try { zkClient.create(managedSchemaPath, data, CreateMode.PERSISTENT, true); diff --git a/solr/core/src/java/org/apache/solr/schema/OpenExchangeRatesOrgProvider.java b/solr/core/src/java/org/apache/solr/schema/OpenExchangeRatesOrgProvider.java index c82266c391b..e6927cefef4 100644 --- a/solr/core/src/java/org/apache/solr/schema/OpenExchangeRatesOrgProvider.java +++ b/solr/core/src/java/org/apache/solr/schema/OpenExchangeRatesOrgProvider.java @@ -20,13 +20,13 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.noggit.JSONParser; import org.apache.lucene.analysis.util.ResourceLoader; -import org.apache.lucene.util.IOUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.slf4j.Logger; @@ -202,7 +202,7 @@ public class OpenExchangeRatesOrgProvider implements ExchangeRateProvider { private JSONParser parser; public OpenExchangeRates(InputStream ratesStream) throws IOException { - parser = new JSONParser(new InputStreamReader(ratesStream, IOUtils.CHARSET_UTF_8)); + parser = new JSONParser(new InputStreamReader(ratesStream, StandardCharsets.UTF_8)); rates = new HashMap<>(); int ev; diff --git a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java index 3e495747730..966e0954749 100644 --- a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java +++ b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java @@ -78,6 +78,7 @@ public class PreAnalyzedField extends FieldType { parser = new JsonPreAnalyzedParser(); } } + args.remove(PARSER_IMPL); } } diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index e39d7e6ad64..9fc4320eda1 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -29,8 +29,6 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -86,23 +84,12 @@ public class StrField extends PrimitiveFieldType { @Override public Object marshalSortValue(Object value) { - if (null == value) { - return null; - } - CharsRef spare = new CharsRef(); - UnicodeUtil.UTF8toUTF16((BytesRef)value, spare); - return spare.toString(); + return marshalStringSortValue(value); } @Override public Object unmarshalSortValue(Object value) { - if (null == value) { - return null; - } - BytesRef spare = new BytesRef(); - String stringVal = (String)value; - UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare); - return spare; + return unmarshalStringSortValue(value); } } diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index f0741f51445..68c740dbc57 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -23,9 +23,7 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.QueryBuilder; -import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -170,22 +168,11 @@ public class TextField extends FieldType { @Override public Object marshalSortValue(Object value) { - if (null == value) { - return null; - } - CharsRef spare = new CharsRef(); - UnicodeUtil.UTF8toUTF16((BytesRef)value, spare); - return spare.toString(); + return marshalStringSortValue(value); } @Override public Object unmarshalSortValue(Object value) { - if (null == value) { - return null; - } - BytesRef spare = new BytesRef(); - String stringVal = (String)value; - UnicodeUtil.UTF16toUTF8(stringVal, 0, stringVal.length(), spare); - return spare; + return unmarshalStringSortValue(value); } } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java index 0a652efb44c..e92d601f437 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java @@ -83,6 +83,16 @@ public class TrieDateField extends DateField implements DateValueFieldType { return wrappedField.getSortField(field, top); } + @Override + public Object marshalSortValue(Object value) { + return value; + } + + @Override + public Object unmarshalSortValue(Object value) { + return value; + } + @Override public ValueSource getValueSource(SchemaField field, QParser parser) { return wrappedField.getValueSource(field, parser); diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index 0f6d9e57c08..601790c41f5 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -443,6 +443,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { private int nullDoc; private FloatArrayList nullScores; private IntOpenHashSet boostDocs; + private int[] boostOrds; public CollapsingScoreCollector(int maxDoc, int segments, @@ -455,11 +456,19 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.boostDocs = boostDocs; if(this.boostDocs != null) { //Set the elevated docs now. + IntOpenHashSet boostG = new IntOpenHashSet(); Iterator it = this.boostDocs.iterator(); while(it.hasNext()) { IntCursor cursor = it.next(); - this.collapsedSet.set(cursor.value); + int i = cursor.value; + this.collapsedSet.set(i); + int ord = values.getOrd(i); + if(ord > -1) { + boostG.add(ord); + } } + boostOrds = boostG.toArray(); + Arrays.sort(boostOrds); } this.values = values; int valueCount = values.getValueCount(); @@ -489,6 +498,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { public void collect(int docId) throws IOException { int globalDoc = docId+this.docBase; int ord = values.getOrd(globalDoc); + if(ord > -1) { float score = scorer.score(); if(score > scores[ord]) { @@ -520,6 +530,12 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.collapsedSet.set(nullDoc); } + if(this.boostOrds != null) { + for(int i=0; i -1) { @@ -539,6 +555,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { while((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int ord = values.getOrd(docId); + if(ord > -1) { dummy.score = scores[ord]; } else if(this.boostDocs != null && boostDocs.contains(docId)) { @@ -600,14 +617,14 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.needsScores = needsScores; this.boostDocs = boostDocs; if(funcQuery != null) { - this.fieldValueCollapse = new ValueSourceCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, funcQuery, searcher); + this.fieldValueCollapse = new ValueSourceCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, funcQuery, searcher, values); } else { if(fieldType instanceof TrieIntField) { - this.fieldValueCollapse = new IntValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new IntValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else if(fieldType instanceof TrieLongField) { - this.fieldValueCollapse = new LongValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new LongValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else if(fieldType instanceof TrieFloatField) { - this.fieldValueCollapse = new FloatValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs); + this.fieldValueCollapse = new FloatValueCollapse(maxDoc, field, nullPolicy, new int[valueCount], max, this.needsScores, boostDocs, values); } else { throw new IOException("min/max must be either TrieInt, TrieLong or TrieFloat."); } @@ -696,6 +713,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { protected float[] scores; protected FixedBitSet collapsedSet; protected IntOpenHashSet boostDocs; + protected int[] boostOrds; protected int nullDoc = -1; protected boolean needsScores; protected boolean max; @@ -709,7 +727,8 @@ public class CollapsingQParserPlugin extends QParserPlugin { int nullPolicy, boolean max, boolean needsScores, - IntOpenHashSet boostDocs) { + IntOpenHashSet boostDocs, + SortedDocValues values) { this.field = field; this.nullPolicy = nullPolicy; this.max = max; @@ -717,11 +736,19 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.collapsedSet = new FixedBitSet(maxDoc); this.boostDocs = boostDocs; if(this.boostDocs != null) { + IntOpenHashSet boostG = new IntOpenHashSet(); Iterator it = boostDocs.iterator(); while(it.hasNext()) { IntCursor cursor = it.next(); - this.collapsedSet.set(cursor.value); + int i = cursor.value; + this.collapsedSet.set(i); + int ord = values.getOrd(i); + if(ord > -1) { + boostG.add(ord); + } } + this.boostOrds = boostG.toArray(); + Arrays.sort(this.boostOrds); } } @@ -730,6 +757,12 @@ public class CollapsingQParserPlugin extends QParserPlugin { this.collapsedSet.set(nullDoc); } + if(this.boostOrds != null) { + for(int i=0; i -1) { @@ -770,8 +803,8 @@ public class CollapsingQParserPlugin extends QParserPlugin { int[] ords, boolean max, boolean needsScores, - IntOpenHashSet boostDocs) throws IOException { - super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); + IntOpenHashSet boostDocs, SortedDocValues values) throws IOException { + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs, values); this.ords = ords; this.ordVals = new int[ords.length]; Arrays.fill(ords, -1); @@ -838,8 +871,8 @@ public class CollapsingQParserPlugin extends QParserPlugin { int[] ords, boolean max, boolean needsScores, - IntOpenHashSet boostDocs) throws IOException { - super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); + IntOpenHashSet boostDocs, SortedDocValues values) throws IOException { + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs, values); this.ords = ords; this.ordVals = new long[ords.length]; Arrays.fill(ords, -1); @@ -907,8 +940,8 @@ public class CollapsingQParserPlugin extends QParserPlugin { int[] ords, boolean max, boolean needsScores, - IntOpenHashSet boostDocs) throws IOException { - super(maxDoc, field, nullPolicy, max, needsScores, boostDocs); + IntOpenHashSet boostDocs, SortedDocValues values) throws IOException { + super(maxDoc, field, nullPolicy, max, needsScores, boostDocs, values); this.ords = ords; this.ordVals = new float[ords.length]; Arrays.fill(ords, -1); @@ -982,8 +1015,8 @@ public class CollapsingQParserPlugin extends QParserPlugin { boolean max, boolean needsScores, IntOpenHashSet boostDocs, - FunctionQuery funcQuery, IndexSearcher searcher) throws IOException { - super(maxDoc, null, nullPolicy, max, needsScores, boostDocs); + FunctionQuery funcQuery, IndexSearcher searcher, SortedDocValues values) throws IOException { + super(maxDoc, null, nullPolicy, max, needsScores, boostDocs, values); this.valueSource = funcQuery.getValueSource(); this.rcontext = ValueSource.newContext(searcher); this.ords = ords; diff --git a/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java b/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java index a2c1acc2e75..0f8c14c40a6 100644 --- a/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java +++ b/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java @@ -22,7 +22,6 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.RequestHandlerUtils; @@ -39,6 +38,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.*; /** @@ -252,7 +252,7 @@ public class FileFloatSource extends ValueSource { return vals; } - BufferedReader r = new BufferedReader(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); + BufferedReader r = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String idName = ffs.keyField.getName(); FieldType idType = ffs.keyField.getType(); diff --git a/solr/core/src/java/org/apache/solr/servlet/BaseSolrFilter.java b/solr/core/src/java/org/apache/solr/servlet/BaseSolrFilter.java new file mode 100644 index 00000000000..966bad2916f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/servlet/BaseSolrFilter.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.servlet; + +import javax.servlet.Filter; + +/** + * All Solr filters available to the user's webapp should + * extend this class and not just implement {@link Filter}. + * This class ensures that the logging configuration is correct + * before any Solr specific code is executed. + */ +abstract class BaseSolrFilter implements Filter { + + static { + CheckLoggingConfiguration.check(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/servlet/BaseSolrServlet.java b/solr/core/src/java/org/apache/solr/servlet/BaseSolrServlet.java new file mode 100644 index 00000000000..3a1f88ac333 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/servlet/BaseSolrServlet.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.servlet; + +import javax.servlet.http.HttpServlet; + +/** + * All Solr servlets available to the user's webapp should + * extend this class and not {@link HttpServlet}. + * This class ensures that the logging configuration is correct + * before any Solr specific code is executed. + */ +@SuppressWarnings("serial") +abstract class BaseSolrServlet extends HttpServlet { + + static { + CheckLoggingConfiguration.check(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/servlet/CheckLoggingConfiguration.java b/solr/core/src/java/org/apache/solr/servlet/CheckLoggingConfiguration.java new file mode 100644 index 00000000000..bd8842cfc74 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/servlet/CheckLoggingConfiguration.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.servlet; + +import org.slf4j.LoggerFactory; + +final class CheckLoggingConfiguration { + + static void check() { + try { + LoggerFactory.getLogger(CheckLoggingConfiguration.class); + } catch (NoClassDefFoundError e) { + throw new NoClassDefFoundError("Failed to initialize Apache Solr: " + +"Could not find necessary SLF4j logging jars. If using Jetty, the SLF4j logging jars need to go in " + +"the jetty lib/ext directory. For other containers, the corresponding directory should be used. " + +"For more information, see: http://wiki.apache.org/solr/SolrLogging"); + } + } + + private CheckLoggingConfiguration() {} + +} diff --git a/solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java b/solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java index beb95aa92a4..157209737dc 100644 --- a/solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java +++ b/solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java @@ -21,8 +21,8 @@ import java.io.InputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -37,7 +37,7 @@ import org.apache.solr.core.SolrCore; * * @since solr 4.0 */ -public final class LoadAdminUiServlet extends HttpServlet { +public final class LoadAdminUiServlet extends BaseSolrServlet { @Override public void doGet(HttpServletRequest request, @@ -51,7 +51,7 @@ public final class LoadAdminUiServlet extends HttpServlet { try { response.setCharacterEncoding("UTF-8"); response.setContentType("text/html"); - Writer out = new OutputStreamWriter(response.getOutputStream(), "UTF-8"); + Writer out = new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8); String html = IOUtils.toString(in, "UTF-8"); Package pack = SolrCore.class.getPackage(); diff --git a/solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java b/solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java index bc497461760..4661f82b3c6 100644 --- a/solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java +++ b/solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java @@ -21,14 +21,13 @@ import java.io.IOException; import javax.servlet.ServletConfig; import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; /** * A Simple redirection servlet to help us deprecate old UI elements */ -public class RedirectServlet extends HttpServlet{ +public class RedirectServlet extends BaseSolrServlet { static final String CONTEXT_KEY = "${context}"; diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java index ad1fa3755b1..efee05ea3c0 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java @@ -73,7 +73,6 @@ import org.apache.solr.util.FastWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.servlet.Filter; import javax.servlet.FilterChain; import javax.servlet.FilterConfig; import javax.servlet.ServletException; @@ -81,6 +80,7 @@ import javax.servlet.ServletRequest; import javax.servlet.ServletResponse; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -89,6 +89,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URL; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -105,13 +106,12 @@ import java.util.Set; * * @since solr 1.2 */ -public class SolrDispatchFilter implements Filter -{ +public class SolrDispatchFilter extends BaseSolrFilter { private static final String CONNECTION_HEADER = "Connection"; private static final String TRANSFER_ENCODING_HEADER = "Transfer-Encoding"; private static final String CONTENT_LENGTH_HEADER = "Content-Length"; - final Logger log; + static final Logger log = LoggerFactory.getLogger(SolrDispatchFilter.class); protected volatile CoreContainer cores; @@ -119,19 +119,9 @@ public class SolrDispatchFilter implements Filter protected String abortErrorMessage = null; protected final HttpClient httpClient = HttpClientUtil.createClient(new ModifiableSolrParams()); - private static final Charset UTF8 = Charset.forName("UTF-8"); + private static final Charset UTF8 = StandardCharsets.UTF_8; public SolrDispatchFilter() { - try { - log = LoggerFactory.getLogger(SolrDispatchFilter.class); - } catch (NoClassDefFoundError e) { - throw new SolrException( - ErrorCode.SERVER_ERROR, - "Could not find necessary SLF4j logging jars. If using Jetty, the SLF4j logging jars need to go in " - +"the jetty lib/ext directory. For other containers, the corresponding directory should be used. " - +"For more information, see: http://wiki.apache.org/solr/SolrLogging", - e); - } } @Override @@ -765,7 +755,7 @@ public class SolrDispatchFilter implements Filter binWriter.write(response.getOutputStream(), solrReq, solrRsp); } else { String charset = ContentStreamBase.getCharsetFromContentType(ct); - Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8")) + Writer out = (charset == null) ? new OutputStreamWriter(response.getOutputStream(), UTF8) : new OutputStreamWriter(response.getOutputStream(), charset); out = new FastWriter(out); diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java index f4eccf47cd0..f3fa91d78c8 100644 --- a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java +++ b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java @@ -27,6 +27,7 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -244,7 +245,7 @@ public class SolrRequestParsers } } }; - parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map, true); + parseFormDataContent(in, Long.MAX_VALUE, StandardCharsets.UTF_8, map, true); } catch (IOException ioe) { throw new SolrException(ErrorCode.BAD_REQUEST, ioe); } @@ -598,7 +599,7 @@ public class SolrRequestParsers // get query String from request body, using the charset given in content-type: final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType()); - final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs); + final Charset charset = (cs == null) ? StandardCharsets.UTF_8 : Charset.forName(cs); InputStream in = null; try { in = req.getInputStream(); diff --git a/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java b/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java index 917b5cd9c44..06718c32410 100644 --- a/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java +++ b/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java @@ -18,22 +18,18 @@ package org.apache.solr.servlet; import java.io.IOException; -import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.List; import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.noggit.CharArr; -import org.noggit.JSONWriter; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.SolrZkClient; @@ -42,6 +38,8 @@ import org.apache.solr.core.CoreContainer; import org.apache.solr.util.FastWriter; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; +import org.noggit.CharArr; +import org.noggit.JSONWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,7 +49,7 @@ import org.slf4j.LoggerFactory; * * @since solr 4.0 */ -public final class ZookeeperInfoServlet extends HttpServlet { +public final class ZookeeperInfoServlet extends BaseSolrServlet { static final Logger log = LoggerFactory.getLogger(ZookeeperInfoServlet.class); @Override @@ -96,7 +94,7 @@ public final class ZookeeperInfoServlet extends HttpServlet { response.setCharacterEncoding("UTF-8"); response.setContentType("application/json"); - Writer out = new FastWriter(new OutputStreamWriter(response.getOutputStream(), IOUtils.CHARSET_UTF_8)); + Writer out = new FastWriter(new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8)); ZKPrinter printer = new ZKPrinter(response, out, cores.getZkController(), addr); printer.detail = detail; diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java index 986fe7ec64e..07ecb4334c2 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/FileDictionaryFactory.java @@ -19,10 +19,10 @@ package org.apache.solr.spelling.suggest; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.suggest.FileDictionary; -import org.apache.lucene.util.IOUtils; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; @@ -53,7 +53,7 @@ public class FileDictionaryFactory extends DictionaryFactory { try { return new FileDictionary(new InputStreamReader( - core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8), fieldDelimiter); + core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8), fieldDelimiter); } catch (IOException e) { throw new RuntimeException(); } diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java index 521bb94398f..8c1293a3680 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java @@ -23,6 +23,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; @@ -131,7 +132,7 @@ public class Suggester extends SolrSpellChecker { } else { try { dictionary = new FileDictionary(new InputStreamReader( - core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8)); + core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8)); } catch (UnsupportedEncodingException e) { // should not happen LOG.error("should not happen", e); diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FreeTextLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FreeTextLookupFactory.java index 2ec64523753..730a2ba417d 100644 --- a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FreeTextLookupFactory.java +++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/FreeTextLookupFactory.java @@ -1,9 +1,10 @@ package org.apache.solr.spelling.suggest.fst; +import java.nio.charset.StandardCharsets; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.analyzing.FreeTextSuggester; -import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; @@ -71,7 +72,7 @@ public class FreeTextLookupFactory extends LookupFactory { : FreeTextSuggester.DEFAULT_GRAMS; byte separator = (params.get(SEPARATOR) != null) - ? params.get(SEPARATOR).toString().getBytes(IOUtils.CHARSET_UTF_8)[0] + ? params.get(SEPARATOR).toString().getBytes(StandardCharsets.UTF_8)[0] : FreeTextSuggester.DEFAULT_SEPARATOR; return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator); diff --git a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java index 858214cf83b..5c76a9848ac 100644 --- a/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java +++ b/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java @@ -88,5 +88,10 @@ public class CachedIndexOutput extends ReusedBufferedIndexOutput { offset += len; } } - + + @Override + public long getChecksum() throws IOException { + flush(); + return dest.getChecksum(); + } } diff --git a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java index 942dfd73f4f..605d2700955 100644 --- a/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java +++ b/solr/core/src/java/org/apache/solr/store/hdfs/NullIndexOutput.java @@ -66,5 +66,9 @@ public class NullIndexOutput extends IndexOutput { length = pos; } } - + + @Override + public long getChecksum() throws IOException { + return 0; // we don't write anything. + } } diff --git a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java index 52435398c98..f4d1fcc0954 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java +++ b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.net.ConnectException; import java.util.ArrayList; import java.util.List; - import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; @@ -206,7 +205,7 @@ public class SolrCmdDistributor { void addCommit(UpdateRequest ureq, CommitUpdateCommand cmd) { if (cmd == null) return; ureq.setAction(cmd.optimize ? AbstractUpdateRequest.ACTION.OPTIMIZE - : AbstractUpdateRequest.ACTION.COMMIT, false, cmd.waitSearcher, cmd.maxOptimizeSegments, cmd.softCommit, cmd.expungeDeletes); + : AbstractUpdateRequest.ACTION.COMMIT, false, cmd.waitSearcher, cmd.maxOptimizeSegments, cmd.softCommit, cmd.expungeDeletes, cmd.openSearcher); } private void submit(Req req) { diff --git a/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java new file mode 100644 index 00000000000..a006ebbb257 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java @@ -0,0 +1,510 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; + +import org.apache.solr.common.SolrException; +import static org.apache.solr.common.SolrException.ErrorCode.*; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; + +import org.apache.solr.core.CloseHook; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.cloud.CloudDescriptor; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.DateField; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.update.DeleteUpdateCommand; +import org.apache.solr.util.DateMathParser; +import org.apache.solr.util.DefaultSolrThreadFactory; +import org.apache.solr.util.plugin.SolrCoreAware; + +import java.text.ParseException; +import java.util.Comparator; +import java.util.ArrayList; +import java.util.List; +import java.util.Collections; +import java.util.concurrent.RejectedExecutionHandler; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + *

          + * Update Processor Factory for managing automatic "expiration" of documents. + *

          + * + *

          + * The DocExpirationUpdateProcessorFactory provides two features related + * to the "expiration" of documents which can be used individually, or in combination: + *

          + *
            + *
          1. Computing expiration field values for documents from a "time to live" (TTL)
          2. + *
          3. Periodically delete documents from the index based on an expiration field
          4. + *
          + * + *

          + * Documents with expiration field values computed from a TTL can be be excluded from + * searchers using simple date based filters relative to NOW, or completely + * removed from the index using the periodic delete function of this factory. Alternatively, + * the periodic delete function of this factory can be used to remove any document with an + * expiration value - even if that expiration was explicitly set with-out leveraging the TTL + * feature of this factory. + *

          + * + *

          + * The following configuration options are supported: + *

          + * + *
            + *
          • expirationFieldName - The name of the expiration field to use + * in any operations (mandatory). + *
          • + *
          • ttlFieldName - Name of a field this process should look + * for in each document processed, defaulting to _ttl_. + * If the specified field name exists in a document, the document field value + * will be parsed as a {@linkplain DateMathParser Date Math Expression} relative to + * NOW and the result will be added to the document using the + * expirationFieldName. Use <null name="ttlFieldName"/> + * to disable this feature. + *
          • + *
          • ttlParamName - Name of an update request param this process should + * look for in each request when processing document additions, defaulting to + * _ttl_. If the the specified param name exists in an update request, + * the param value will be parsed as a {@linkplain DateMathParser Date Math Expression} + * relative to NOW and the result will be used as a default for any + * document included in that request that does not already have a value in the + * field specified by ttlFieldName. Use + * <null name="ttlParamName"/> to disable this feature. + *
          • + *
          • autoDeletePeriodSeconds - Optional numeric value indicating how + * often this factory should trigger a delete to remove documents. If this option is + * used, and specifies a non-negative numeric value, a background thread will be + * created that will execute recurring deleteByQuery commands using the + * specified period. The delete query will remove all documents with an + * expirationFieldName up to NOW. + *
          • + *
          • autoDeleteChainName - Optional name of an + * updateRequestProcessorChain to use when executing automatic deletes. + * If not specified, or <null/>, the default + * updateRequestProcessorChain for this collection is used. + * This option is ignored unless autoDeletePeriodSeconds is configured + * and is non-negative. + *
          • + *
          + * + *

          + * For example: The configuration below will cause any document with a field named + * _ttl_ to have a Date field named _expire_at_ computed + * for it when added -- but no automatic deletion will happen. + *

          + * + *
          + * <processor class="solr.processor.DocExpirationUpdateProcessorFactory">
          + *   <str name="expirationFieldName">_expire_at_</str>
          + * </processor>
          + * + *

          + * Alternatively, in this configuration deletes will occur automatically against the + * _expire_at_ field every 5 minutes - but this processor will not + * automatically populate the _expire_at_ using any sort of TTL expression. + * Only documents that were added with an explicit _expire_at_ field value + * will ever be deleted. + *

          + * + *
          + * <processor class="solr.processor.DocExpirationUpdateProcessorFactory">
          + *   <null name="ttlFieldName"/>
          + *   <null name="ttlParamName"/>
          + *   <int name="autoDeletePeriodSeconds">300</int>
          + *   <str name="expirationFieldName">_expire_at_</str>
          + * </processor>
          + * + *

          + * This last example shows the combination of both features using a custom + * ttlFieldName: Documents with a my_ttl field will + * have an _expire_at_ field computed, and deletes will be triggered + * every 5 minutes to remove documents whose + * _expire_at_ field value is in the past. + *

          + * + *
          + * <processor class="solr.processor.DocExpirationUpdateProcessorFactory">
          + *   <int name="autoDeletePeriodSeconds">300</int>
          + *   <str name="ttlFieldName">my_ttl</str>
          + *   <null name="ttlParamName"/>
          + *   <str name="expirationFieldName">_expire_at_</str>
          + * </processor>
          + */ +public final class DocExpirationUpdateProcessorFactory + extends UpdateRequestProcessorFactory + implements SolrCoreAware { + + public final static Logger log = LoggerFactory.getLogger(DocExpirationUpdateProcessorFactory.class); + + private static final String DEF_TTL_KEY = "_ttl_"; + private static final String EXP_FIELD_NAME_CONF = "expirationFieldName"; + private static final String TTL_FIELD_NAME_CONF = "ttlFieldName"; + private static final String TTL_PARAM_NAME_CONF = "ttlParamName"; + private static final String DEL_CHAIN_NAME_CONF = "autoDeleteChainName"; + private static final String DEL_PERIOD_SEC_CONF = "autoDeletePeriodSeconds"; + + private SolrCore core; + private ScheduledThreadPoolExecutor executor; + + private String expireField = null; + private String ttlField = null; + private String ttlParam = null; + + private String deleteChainName = null; + private long deletePeriodSeconds = -1L; + + private SolrException confErr(final String msg) { + return confErr(msg, null); + } + private SolrException confErr(final String msg, SolrException root) { + return new SolrException(SERVER_ERROR, this.getClass().getSimpleName()+": "+msg, root); + } + private String removeArgStr(final NamedList args, final String arg, final String def, + final String errMsg) { + + if (args.indexOf(arg,0) < 0) return def; + + Object tmp = args.remove(arg); + if (null == tmp) return null; + + if (tmp instanceof String) return tmp.toString(); + + throw confErr(arg + " " + errMsg); + } + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + + deleteChainName = removeArgStr(args, DEL_CHAIN_NAME_CONF, null, + "must be a or for default chain"); + + ttlField = removeArgStr(args, TTL_FIELD_NAME_CONF, DEF_TTL_KEY, + "must be a or to disable"); + ttlParam = removeArgStr(args, TTL_PARAM_NAME_CONF, DEF_TTL_KEY, + "must be a or to disable"); + + expireField = removeArgStr(args, EXP_FIELD_NAME_CONF, null, "must be a "); + if (null == expireField) { + throw confErr(EXP_FIELD_NAME_CONF + " must be configured"); + } + + Object tmp = args.remove(DEL_PERIOD_SEC_CONF); + if (null != tmp) { + if (! (tmp instanceof Number)) { + throw confErr(DEL_PERIOD_SEC_CONF + " must be an or "); + } + deletePeriodSeconds = ((Number)tmp).longValue(); + } + + super.init(args); + } + + @Override + public void inform(SolrCore core) { + this.core = core; + + if (null == core.getLatestSchema().getFieldTypeNoEx(expireField)) { + // TODO: check for managed schema and auto-add as a date field? + throw confErr(EXP_FIELD_NAME_CONF + " does not exist in schema: " + expireField); + } + + if (0 < deletePeriodSeconds) { + // validate that we have a chain we can work with + try { + Object ignored = core.getUpdateProcessingChain(deleteChainName); + } catch (SolrException e) { + throw confErr(DEL_CHAIN_NAME_CONF + " does not exist: " + deleteChainName, e); + } + // schedule recuring deletion + initDeleteExpiredDocsScheduler(core); + } + } + + private void initDeleteExpiredDocsScheduler(SolrCore core) { + executor = new ScheduledThreadPoolExecutor + (1, new DefaultSolrThreadFactory("autoExpireDocs"), + new RejectedExecutionHandler() { + public void rejectedExecution(Runnable r, ThreadPoolExecutor e) { + log.warn("Skipping execution of '{}' using '{}'", r, e); + } + }); + + core.addCloseHook(new CloseHook() { + public void postClose(SolrCore core) { + // update handler is gone, hard terminiate anything that's left. + + if (executor.isTerminating()) { + log.info("Triggering hard shutdown of DocExpiration Executor"); + executor.shutdownNow(); + } + } + public void preClose(SolrCore core) { + log.info("Triggering Graceful shutdown of DocExpiration Executor"); + executor.shutdown(); + } + }); + + executor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + executor.setContinueExistingPeriodicTasksAfterShutdownPolicy(false); + // we don't want this firing right away, since the core may not be ready + final long initialDelay = deletePeriodSeconds; + // TODO: should we make initialDelay configurable + // TODO: should we make initialDelay some fraction of the period? + executor.scheduleAtFixedRate(new DeleteExpiredDocsRunnable(this), + deletePeriodSeconds, + deletePeriodSeconds, + TimeUnit.SECONDS); + + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next ) { + + String defaultTtl = (null == ttlParam) ? null : req.getParams().get(ttlParam); + + if (null == ttlField && null == defaultTtl) { + // nothing to do, shortcircut ourselves out of the chain. + return next; + } else { + return new TTLUpdateProcessor(defaultTtl, expireField, ttlField, next); + } + } + + private static final class TTLUpdateProcessor extends UpdateRequestProcessor { + + final String defaultTtl; + final String expireField; + final String ttlField; + public TTLUpdateProcessor(final String defaultTtl, + final String expireField, + final String ttlField, + final UpdateRequestProcessor next) { + super(next); + this.defaultTtl = defaultTtl; + this.expireField = expireField; + this.ttlField = ttlField; + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + final SolrInputDocument doc = cmd.getSolrInputDocument(); + + final String math = doc.containsKey(ttlField) + ? doc.getFieldValue(ttlField).toString() : defaultTtl; + + if (null != math) { + try { + final DateMathParser dmp = new DateMathParser(); + // TODO: should we try to accept things like "1DAY" as well as "+1DAY" ? + // How? + // 'startsWith("+")' is a bad idea because it would cause porblems with + // things like "/DAY+1YEAR" + // Maybe catch ParseException and rety with "+" prepended? + doc.addField(expireField, dmp.parseMath(math)); + } catch (ParseException pe) { + throw new SolrException(BAD_REQUEST, "Can't parse ttl as date math: " + math, pe); + } + } + + super.processAdd(cmd); + } + } + + /** + *

          + * Runnable that uses the the deleteChainName configured for + * this factory to execute a delete by query (using the configured + * expireField) followed by a soft commit to re-open searchers (if needed) + *

          + *

          + * This logic is all wrapped up in a new SolrRequestInfo context with + * some logging to help make it obvious this background activity is happening. + *

          + *

          + * In cloud mode, this runner only triggers deletes if + * {@link #iAmInChargeOfPeriodicDeletes} is true. + * (logging is minimal in this situation) + *

          + * + * @see #iAmInChargeOfPeriodicDeletes + */ + private static final class DeleteExpiredDocsRunnable implements Runnable { + final DocExpirationUpdateProcessorFactory factory; + final SolrCore core; + final String deleteChainName; + final String expireField; + public DeleteExpiredDocsRunnable(final DocExpirationUpdateProcessorFactory factory) { + this.factory = factory; + this.core = factory.core; + this.deleteChainName = factory.deleteChainName; + this.expireField = factory.expireField; + } + + public void run() { + // setup the request context early so the logging (including any from + // shouldWeDoPeriodicDelete() ) includes the core context info + final SolrQueryRequest req = new LocalSolrQueryRequest + (factory.core, Collections.emptyMap()); + try { + final SolrQueryResponse rsp = new SolrQueryResponse(); + SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); + try { + + if (! factory.iAmInChargeOfPeriodicDeletes() ) { + // No-Op + return; + } + log.info("Begining periodic deletion of expired docs"); + + UpdateRequestProcessorChain chain = core.getUpdateProcessingChain(deleteChainName); + UpdateRequestProcessor proc = chain.createProcessor(req, rsp); + if (null == proc) { + log.warn("No active processors, skipping automatic deletion " + + "of expired docs using chain: {}", deleteChainName); + return; + } + try { + DeleteUpdateCommand del = new DeleteUpdateCommand(req); + del.setQuery("{!cache=false}" + expireField + ":[* TO " + + DateField.formatExternal(SolrRequestInfo.getRequestInfo().getNOW()) + + "]"); + proc.processDelete(del); + + // TODO: should this be more configurable? + // TODO: in particular: should hard commit be optional? + CommitUpdateCommand commit = new CommitUpdateCommand(req, false); + commit.softCommit = true; + commit.openSearcher = true; + proc.processCommit(commit); + + } finally { + proc.finish(); + } + + log.info("Finished periodic deletion of expired docs"); + } catch (IOException ioe) { + log.error("IOException in periodic deletion of expired docs: " + + ioe.getMessage(), ioe); + // DO NOT RETHROW: ScheduledExecutor will supress subsequent executions + } catch (RuntimeException re) { + log.error("Runtime error in periodic deletion of expired docs: " + + re.getMessage(), re); + // DO NOT RETHROW: ScheduledExecutor will supress subsequent executions + } finally { + SolrRequestInfo.clearRequestInfo(); + } + } finally { + req.close(); + } + } + } + + /** + *

          + * Helper method that returns true if the Runnable managed by this factory + * should be responseible of doing periodica deletes. + *

          + *

          + * In simple standalone instalations this method always returns true, + * but in cloud mode it will be true if and only if we are currently the leader + * of the (active) slice with the first name (lexigraphically). + *

          + *

          + * If this method returns false, it may have also logged a message letting the user + * know why we aren't attempting period deletion (but it will attempt to not log + * this excessively) + *

          + */ + private boolean iAmInChargeOfPeriodicDeletes() { + ZkController zk = core.getCoreDescriptor().getCoreContainer().getZkController(); + + if (null == zk) return true; + + // This is a lot simpler then doing our own "leader" election across all replicas + // of all shards since: + // a) we already have a per shard leader + // b) shard names must be unique + // c) ClusterState is already being "watched" by ZkController, no additional zk hits + // d) there might be multiple instances of this factory (in multiple chains) per + // collection, so picking an ephemeral node name for our election would be tricky + + CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor(); + String col = desc.getCollectionName(); + + List slices = new ArrayList(zk.getClusterState().getActiveSlices(col)); + Collections.sort(slices, COMPARE_SLICES_BY_NAME); + if (slices.isEmpty()) { + log.error("Collection {} has no active Slices?", col); + return false; + } + Replica firstSliceLeader = slices.get(0).getLeader(); + if (null == firstSliceLeader) { + log.warn("Slice in charge of periodic deletes for {} does not currently have a leader", + col); + return false; + } + String leaderInCharge = firstSliceLeader.getName(); + String myCoreNodeName = desc.getCoreNodeName(); + + boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName); + + if (previouslyInChargeOfDeletes && ! inChargeOfDeletesRightNow) { + // don't spam the logs constantly, just log when we know that we're not the guy + // (the first time -- or anytime we were, but no longer are) + log.info("Not currently in charge of periodic deletes for this collection, " + + "will not trigger delete or log again until this changes"); + } + + previouslyInChargeOfDeletes = inChargeOfDeletesRightNow; + return inChargeOfDeletesRightNow; + } + + /** @see #iAmInChargeOfPeriodicDeletes */ + private volatile boolean previouslyInChargeOfDeletes = true; + + private static final Comparator COMPARE_SLICES_BY_NAME = new Comparator() { + public int compare(Slice a, Slice b) { + return a.getName().compareTo(b.getName()); + } + }; + +} + + + diff --git a/solr/core/src/java/org/apache/solr/update/processor/MD5Signature.java b/solr/core/src/java/org/apache/solr/update/processor/MD5Signature.java index be849308c7b..02a0e2b223e 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/MD5Signature.java +++ b/solr/core/src/java/org/apache/solr/update/processor/MD5Signature.java @@ -17,6 +17,7 @@ package org.apache.solr.update.processor; */ import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -44,13 +45,7 @@ public class MD5Signature extends Signature { @Override public void add(String content) { - try { - digester.update(content.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - // won't happen - log.error("UTF-8 not supported", e); - throw new RuntimeException(e); - } + digester.update(content.getBytes(StandardCharsets.UTF_8)); } @Override diff --git a/solr/core/src/java/org/apache/solr/update/processor/RegexpBoostProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/RegexpBoostProcessor.java index 29a7acba426..c70a2431e28 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/RegexpBoostProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/RegexpBoostProcessor.java @@ -21,18 +21,18 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import org.apache.commons.io.IOUtils; +import org.apache.commons.io.IOUtils; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,7 +121,7 @@ public class RegexpBoostProcessor extends UpdateRequestProcessor { private List initBoostEntries(InputStream is) throws IOException { List newBoostEntries = new ArrayList<>(); - BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8"))); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); try { String line = null; while ((line = reader.readLine()) != null) { diff --git a/solr/core/src/java/org/apache/solr/update/processor/StatelessScriptUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/StatelessScriptUpdateProcessorFactory.java index f330e63c34b..c634ec47ea1 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/StatelessScriptUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/StatelessScriptUpdateProcessorFactory.java @@ -27,7 +27,6 @@ import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.*; import org.apache.solr.util.plugin.SolrCoreAware; - import org.apache.commons.lang.StringUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.FilenameUtils; @@ -41,6 +40,7 @@ import javax.script.ScriptException; import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.Set; import java.util.LinkedHashSet; import java.util.ArrayList; @@ -494,7 +494,7 @@ public class StatelessScriptUpdateProcessorFactory extends UpdateRequestProcesso public Reader openReader(SolrResourceLoader resourceLoader) throws IOException { InputStream input = resourceLoader.openResource(fileName); return org.apache.lucene.util.IOUtils.getDecodingReader - (input, org.apache.lucene.util.IOUtils.CHARSET_UTF_8); + (input, StandardCharsets.UTF_8); } } } diff --git a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java index 80e54eed158..a55f6228720 100644 --- a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java +++ b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java @@ -18,37 +18,37 @@ package org.apache.solr.util; */ import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.ByteArrayInputStream; import java.io.InputStreamReader; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.ProtocolException; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.HashSet; import java.util.TimeZone; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import java.util.zip.GZIPInputStream; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; -import java.net.ProtocolException; -import java.net.URL; -import java.net.URLEncoder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -893,13 +893,7 @@ public class SimplePostTool { * @return the input stream */ public static InputStream stringToStream(String s) { - InputStream is = null; - try { - is = new ByteArrayInputStream(s.getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - fatal("Shouldn't happen: UTF-8 not supported?!?!?!"); - } - return is; + return new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8)); } /** @@ -961,10 +955,9 @@ public class SimplePostTool { /** * Takes a string as input and returns a DOM */ - public static Document makeDom(String in, String inputEncoding) throws SAXException, IOException, + public static Document makeDom(byte[] in) throws SAXException, IOException, ParserConfigurationException { - InputStream is = new ByteArrayInputStream(in - .getBytes(inputEncoding)); + InputStream is = new ByteArrayInputStream(in); Document dom = DocumentBuilderFactory.newInstance() .newDocumentBuilder().parse(is); return dom; @@ -1105,7 +1098,7 @@ public class SimplePostTool { */ protected List parseRobotsTxt(InputStream is) throws IOException { List disallows = new ArrayList<>(); - BufferedReader r = new BufferedReader(new InputStreamReader(is, "UTF-8")); + BufferedReader r = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String l; while((l = r.readLine()) != null) { String[] arr = l.split("#"); @@ -1137,10 +1130,9 @@ public class SimplePostTool { URL extractUrl = new URL(appendParam(postUrl.toString(), "extractOnly=true")); boolean success = postData(is, null, os, type, extractUrl); if(success) { - String rawXml = os.toString("UTF-8"); - Document d = makeDom(rawXml, "UTF-8"); + Document d = makeDom(os.toByteArray()); String innerXml = getXP(d, "/response/str/text()[1]", false); - d = makeDom(innerXml, "UTF-8"); + d = makeDom(innerXml.getBytes(StandardCharsets.UTF_8)); NodeList links = getNodesFromXP(d, "/html/body//a/@href"); for(int i = 0; i < links.getLength(); i++) { String link = links.item(i).getTextContent(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/package.html b/solr/core/src/test-files/solr/collection1/conf/schema-preanalyzed.xml similarity index 53% rename from lucene/codecs/src/java/org/apache/lucene/codecs/intblock/package.html rename to solr/core/src/test-files/solr/collection1/conf/schema-preanalyzed.xml index 403ea1b55f6..a9422e313e6 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/package.html +++ b/solr/core/src/test-files/solr/collection1/conf/schema-preanalyzed.xml @@ -1,4 +1,4 @@ - + - - - - - -Intblock: base support for fixed or variable length block integer encoders - - + + + + + + + + + + + + + + + id + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml index 60281903716..ece1a8e9642 100755 --- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml @@ -457,6 +457,7 @@ + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sorts.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sorts.xml index f5b711c3769..53433b0ea50 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sorts.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sorts.xml @@ -71,6 +71,94 @@ NOTE: Tests expect every field in this schema to be sortable. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -82,6 +170,16 @@ NOTE: Tests expect every field in this schema to be sortable. + + + + + + + + + + @@ -94,24 +192,76 @@ NOTE: Tests expect every field in this schema to be sortable. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -157,6 +307,94 @@ NOTE: Tests expect every field in this schema to be sortable. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-doc-expire-update-processor.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-doc-expire-update-processor.xml new file mode 100644 index 00000000000..b783a5dc62b --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-doc-expire-update-processor.xml @@ -0,0 +1,103 @@ + + + + + + + + ${solr.data.dir:} + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + ${solr.ulog.dir:} + + + + + + true + + + + + + + + + + _expire_at_tdt + + + + + + _ttl_field_ + + _expire_at_tdt + + + _ttl_field_ + + + + + + _ttl_param_ + + _expire_at_tdt + + + + + + _ttl_field_ + _ttl_param_ + _expire_at_tdt + + + _ttl_field_ + + + + + + + + 3 + eXpField_tdt + tTl_s + + + + + + + + diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java index 0b254e5a3a2..34383c60939 100644 --- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -20,6 +20,7 @@ package org.apache.solr; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -468,7 +469,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); builder.parse(new ByteArrayInputStream - (writer.toString().getBytes("UTF-8"))); + (writer.toString().getBytes(StandardCharsets.UTF_8))); req.close(); } diff --git a/solr/core/src/test/org/apache/solr/CursorPagingTest.java b/solr/core/src/test/org/apache/solr/CursorPagingTest.java index e291d8c144d..6552da7ef17 100644 --- a/solr/core/src/test/org/apache/solr/CursorPagingTest.java +++ b/solr/core/src/test/org/apache/solr/CursorPagingTest.java @@ -31,18 +31,22 @@ import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.DateField; import org.apache.solr.search.CursorMark; //jdoc import org.noggit.ObjectBuilder; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Collection; import java.util.Collections; +import java.util.Locale; import java.util.Map; -import java.nio.ByteBuffer; +import java.util.UUID; import org.junit.BeforeClass; import org.junit.After; @@ -56,6 +60,9 @@ public class CursorPagingTest extends SolrTestCaseJ4 { public final static String TEST_SOLRCONFIG_NAME = "solrconfig-deeppaging.xml"; /** schema.xml file name, shared with other cursor related tests */ public final static String TEST_SCHEMAXML_NAME = "schema-sorts.xml"; + /** values from enumConfig.xml */ + public static final String[] SEVERITY_ENUM_VALUES = + { "Not Available", "Low", "Medium", "High", "Critical" }; @BeforeClass public static void beforeTests() throws Exception { @@ -676,7 +683,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 { String cursorMark = CURSOR_MARK_START; int docsOnThisPage = Integer.MAX_VALUE; while (0 < docsOnThisPage) { - String json = assertJQ(req(params, + String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark)); Map rsp = (Map) ObjectBuilder.fromJSON(json); assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json, @@ -893,7 +900,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 { if (useField()) { doc.addField("str", skewed(randomUsableUnicodeString(), TestUtil.randomSimpleString(random(), 1, 1))); - } if (useField()) { int numBytes = (int) skewed(TestUtil.nextInt(random(), 20, 50), 2); @@ -901,6 +907,23 @@ public class CursorPagingTest extends SolrTestCaseJ4 { random().nextBytes(randBytes); doc.addField("bin", ByteBuffer.wrap(randBytes)); } + if (useField()) { + doc.addField("date", skewed(randomDate(), + dateWithRandomSecondOn2010_10_31_at_10_31())); + } + if (useField()) { + doc.addField("uuid", UUID.randomUUID().toString()); + } + if (useField()) { + doc.addField("currency", skewed("" + (random().nextInt() / 100.) + "," + randomCurrency(), + "" + TestUtil.nextInt(random(), 250, 320) + ",USD")); + } + if (useField()) { + doc.addField("bool", random().nextBoolean() ? "t" : "f"); + } + if (useField()) { + doc.addField("enum", randomEnumValue()); + } return doc; } @@ -939,6 +962,25 @@ public class CursorPagingTest extends SolrTestCaseJ4 { return result; } + private static String randomDate() { + return DateField.formatExternal(new Date(random().nextLong())); + } + + private static String dateWithRandomSecondOn2010_10_31_at_10_31() { + return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z", + TestUtil.nextInt(random(), 0, 59)); + } + + private static final String[] currencies = { "USD", "EUR", "NOK" }; + + public static String randomCurrency() { + return currencies[random().nextInt(currencies.length)]; + } + + private static String randomEnumValue() { + return SEVERITY_ENUM_VALUES[random().nextInt(SEVERITY_ENUM_VALUES.length)]; + } + /** * Given a list of fieldNames, builds up a random sort string which is guaranteed to * have at least 3 clauses, ending with the "id" field for tie breaking @@ -956,15 +998,16 @@ public class CursorPagingTest extends SolrTestCaseJ4 { String field = shuffledNames.get(i); // wrap in a function sometimes - if ( (!"score".equals(field)) + if ( (!"score".equals(field) && !field.contains("bcd")) && (0 == TestUtil.nextInt(random(), 0, 7)) ) { // specific function doesn't matter, just proving that we can handle the concept. // but we do have to be careful with non numeric fields - if (field.startsWith("str") || field.startsWith("bin")) { - field = "if(exists(" + field + "),47,83)"; - } else { + if (field.contains("float") || field.contains("double") + || field.contains("int") || field.contains("long")) { field = "abs(" + field + ")"; + } else { + field = "if(exists(" + field + "),47,83)"; } } result.append(field).append(random().nextBoolean() ? " asc, " : " desc, "); diff --git a/solr/core/src/test/org/apache/solr/TestSolrCoreProperties.java b/solr/core/src/test/org/apache/solr/TestSolrCoreProperties.java index 15ee5627780..17de9263d9b 100644 --- a/solr/core/src/test/org/apache/solr/TestSolrCoreProperties.java +++ b/solr/core/src/test/org/apache/solr/TestSolrCoreProperties.java @@ -29,6 +29,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.junit.BeforeClass; +import java.nio.charset.StandardCharsets; /** *

          Test for Loading core properties from a properties file

          @@ -63,7 +64,7 @@ public class TestSolrCoreProperties extends SolrJettyTestBase { Properties p = new Properties(); p.setProperty("foo.foo1", "f1"); p.setProperty("foo.foo2", "f2"); - Writer fos = new OutputStreamWriter(new FileOutputStream(new File(confDir, "solrcore.properties")), IOUtils.CHARSET_UTF_8); + Writer fos = new OutputStreamWriter(new FileOutputStream(new File(confDir, "solrcore.properties")), StandardCharsets.UTF_8); p.store(fos, null); IOUtils.close(fos); diff --git a/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java b/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java index 10cf2472967..58158d9a15c 100644 --- a/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java +++ b/solr/core/src/test/org/apache/solr/analysis/LegacyHTMLStripCharFilterTest.java @@ -23,6 +23,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; @@ -62,7 +63,7 @@ public class LegacyHTMLStripCharFilterTest extends BaseTokenStreamTestCase { //Some sanity checks, but not a full-fledged check public void testHTML() throws Exception { InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html"); - LegacyHTMLStripCharFilter reader = new LegacyHTMLStripCharFilter(new InputStreamReader(stream, "UTF-8")); + LegacyHTMLStripCharFilter reader = new LegacyHTMLStripCharFilter(new InputStreamReader(stream, StandardCharsets.UTF_8)); StringBuilder builder = new StringBuilder(); int ch = -1; while ((ch = reader.read()) != -1){ diff --git a/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java b/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java index 7be2c339823..e59758d08bb 100644 --- a/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/AbstractAnalyticsStatsTest.java @@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -82,7 +83,7 @@ public class AbstractAnalyticsStatsTest extends SolrTestCaseJ4 { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); // never forget this! DocumentBuilder builder = factory.newDocumentBuilder(); - doc = builder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("UTF-8")))); + doc = builder.parse(new InputSource(new ByteArrayInputStream(response.getBytes(StandardCharsets.UTF_8)))); xPathFact = XPathFactory.newInstance(); rawResponse = response; } @@ -106,8 +107,8 @@ public class AbstractAnalyticsStatsTest extends SolrTestCaseJ4 { case DOUBLE: return Double.parseDouble(val); case FLOAT: return Float.parseFloat(val); case LONG: return Long.parseLong(val); - case STRING: return val; - case DATE: return val; + case STRING: assertTrue(rawResponse, val != null && val.length() > 0 ); return val; + case DATE: assertTrue(rawResponse, val != null && val.length() > 0 ); return val; } } catch (Exception e) { e.printStackTrace(); diff --git a/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java b/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java index 43c5f501c81..5819254b429 100644 --- a/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/expression/ExpressionTest.java @@ -20,7 +20,6 @@ package org.apache.solr.analytics.expression; import com.google.common.collect.ObjectArrays; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.LuceneTestCase.BadApple; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.analytics.AbstractAnalyticsStatsTest; @@ -36,7 +35,6 @@ import java.util.ArrayList; import java.util.Scanner; @SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Appending", "Asserting"}) -@BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5302") public class ExpressionTest extends AbstractAnalyticsStatsTest { private static final String fileName = "/analytics/requestFiles/expressions.txt"; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java index 820a1c55872..7abd79a3ad9 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/AbstractAnalyticsFacetTest.java @@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -65,7 +66,7 @@ public class AbstractAnalyticsFacetTest extends SolrTestCaseJ4 { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); // never forget this! DocumentBuilder builder = factory.newDocumentBuilder(); - doc = builder.parse(new InputSource(new ByteArrayInputStream(response.getBytes("UTF-8")))); + doc = builder.parse(new InputSource(new ByteArrayInputStream(response.getBytes(StandardCharsets.UTF_8)))); xPathFact = XPathFactory.newInstance(); rawResponse = response; } diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java index 2e0b6206fba..8377ccd4822 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetExtrasTest.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; diff --git a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java index 0c28fa5af4e..12cfe37cb6a 100644 --- a/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/facet/FieldFacetTest.java @@ -18,17 +18,18 @@ package org.apache.solr.analytics.facet; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.Ignore; @SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Appending","Asserting"}) -@Ignore // failing after https://issues.apache.org/jira/browse/SOLR-5685 public class FieldFacetTest extends AbstractAnalyticsFacetTest{ static String fileName = "/analytics/requestFiles/fieldFacets.txt"; @@ -390,8 +391,13 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{ } } - assertU(commit()); - setResponse(h.query(request(fileToStringArr(FieldFacetTest.class, fileName)))); + assertU(commit()); + String[] reqFacetParamas = fileToStringArr(FieldFacetTest.class, fileName); + String[] reqParamas = new String[reqFacetParamas.length + 2]; + System.arraycopy(reqFacetParamas, 0, reqParamas, 0, reqFacetParamas.length); + reqParamas[reqFacetParamas.length] = "solr"; + reqParamas[reqFacetParamas.length+1] = "asc"; + setResponse(h.query(request(reqFacetParamas))); } @SuppressWarnings("unchecked") @@ -1063,11 +1069,18 @@ public class FieldFacetTest extends AbstractAnalyticsFacetTest{ } private void checkStddevs(ArrayList list1, ArrayList list2) { + Collections.sort(list1); + Collections.sort(list2); for (int i = 0; i) actual); + Collections.sort((List) expected); + Assert.assertEquals(mes, actual, expected); + } } diff --git a/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java b/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java index bf212a118b7..29b97d3f1a8 100644 --- a/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java +++ b/solr/core/src/test/org/apache/solr/analytics/util/valuesource/FunctionTest.java @@ -90,6 +90,7 @@ public class FunctionTest extends AbstractAnalyticsStatsTest { double result = (Double)getStatResult("ar", "sum", VAL_TYPE.DOUBLE); double calculated = (Double)getStatResult("ar", "sumc", VAL_TYPE.DOUBLE); assertEquals(getRawResponse(), result, calculated, 0.0); + // TODO checfk why asserted 2times assertEquals(getRawResponse(), result, calculated, 0.0); result = (Double)getStatResult("ar", "mean", VAL_TYPE.DOUBLE); @@ -170,24 +171,24 @@ public class FunctionTest extends AbstractAnalyticsStatsTest { @Test public void dateMathTest() throws Exception { - String result = (String)getStatResult("dmr", "median", VAL_TYPE.STRING); - String calculated = (String)getStatResult("dmr", "medianc", VAL_TYPE.STRING); + String result = (String)getStatResult("dmr", "median", VAL_TYPE.DATE); + String calculated = (String)getStatResult("dmr", "medianc", VAL_TYPE.DATE); assertEquals(getRawResponse(), result, calculated); - result = (String)getStatResult("dmr", "max", VAL_TYPE.STRING); - calculated = (String)getStatResult("dmr", "maxc", VAL_TYPE.STRING); + result = (String)getStatResult("dmr", "max", VAL_TYPE.DATE); + calculated = (String)getStatResult("dmr", "maxc", VAL_TYPE.DATE); assertEquals(getRawResponse(), result, calculated); } @Test public void constantDateTest() throws Exception { - String result = (String)getStatResult("cdr", "median", VAL_TYPE.STRING); - String calculated = (String)getStatResult("cdr", "medianc", VAL_TYPE.STRING); + String result = (String)getStatResult("cdr", "median", VAL_TYPE.DATE); + String calculated = (String)getStatResult("cdr", "medianc", VAL_TYPE.DATE); assertEquals(getRawResponse(), result, calculated); assertEquals(getRawResponse(), result, calculated); - result = (String)getStatResult("cdr", "max", VAL_TYPE.STRING); - calculated = (String)getStatResult("cdr", "maxc", VAL_TYPE.STRING); + result = (String)getStatResult("cdr", "max", VAL_TYPE.DATE); + calculated = (String)getStatResult("cdr", "maxc", VAL_TYPE.DATE); assertEquals(getRawResponse(), result, calculated); } diff --git a/solr/core/src/test/org/apache/solr/cloud/AsyncMigrateRouteKeyTest.java b/solr/core/src/test/org/apache/solr/cloud/AsyncMigrateRouteKeyTest.java index 959d97e4c1a..6b9a7090397 100644 --- a/solr/core/src/test/org/apache/solr/cloud/AsyncMigrateRouteKeyTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/AsyncMigrateRouteKeyTest.java @@ -53,7 +53,11 @@ public class AsyncMigrateRouteKeyTest extends MigrateRouteKeyTest { params = new ModifiableSolrParams(); params.set("action", CollectionParams.CollectionAction.REQUESTSTATUS.toString()); params.set(OverseerCollectionProcessor.REQUESTID, asyncId); - message = sendStatusRequestWithRetry(params, 10); + // This task takes long enough to run. Also check for the current state of the task to be running. + message = sendStatusRequestWithRetry(params, 2); + assertEquals("found " + asyncId + " in submitted tasks", message); + // Now wait until the task actually completes successfully/fails. + message = sendStatusRequestWithRetry(params, 20); assertEquals("Task " + asyncId + " not found in completed tasks.", "found " + asyncId + " in completed tasks", message); } @@ -92,7 +96,6 @@ public class AsyncMigrateRouteKeyTest extends MigrateRouteKeyTest { if (state.equals("completed") || state.equals("failed")) return (String) status.get("msg"); - try { Thread.sleep(1000); } catch (InterruptedException e) { diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java index 9803470e25b..9f2d84ca7a8 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java @@ -75,14 +75,14 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase SolrCmdDistributor.testing_errorHook = null; } - public static String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; - public static RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; + protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; + protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; - protected String[] getFieldNames() { + public String[] getFieldNames() { return fieldNames; } - protected RandVal[] getRandValues() { + public RandVal[] getRandValues() { return randVals; } diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java index 19ae3ed13a7..40137955ad3 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java @@ -58,14 +58,14 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase { SolrCmdDistributor.testing_errorHook = null; } - public static String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; - public static RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; + protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; + protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; - protected String[] getFieldNames() { + public String[] getFieldNames() { return fieldNames; } - protected RandVal[] getRandValues() { + public RandVal[] getRandValues() { return randVals; } diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java index 8a044a5afd5..6885db951b7 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java @@ -256,7 +256,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest { // TODO: close Overseer Overseer overseer = new Overseer( - new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader); + new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader,null); overseer.close(); ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_")); diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java new file mode 100644 index 00000000000..d532d358a61 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/DistribDocExpirationUpdateProcessorTest.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import org.apache.lucene.util.LuceneTestCase.Slow; +import org.apache.lucene.util.TestUtil; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; + +import org.apache.solr.update.processor.DocExpirationUpdateProcessorFactory; // jdoc +import org.apache.solr.update.processor.DocExpirationUpdateProcessorFactoryTest; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.HashSet; +import java.util.HashMap; + +/** Test of {@link DocExpirationUpdateProcessorFactory} in a cloud setup */ +@Slow // Has to do some sleeping to wait for a future expiration +public class DistribDocExpirationUpdateProcessorTest extends AbstractFullDistribZkTestBase { + + public static Logger log = LoggerFactory.getLogger(DistribDocExpirationUpdateProcessorTest.class); + + public DistribDocExpirationUpdateProcessorTest() { + configString = DocExpirationUpdateProcessorFactoryTest.CONFIG_XML; + schemaString = DocExpirationUpdateProcessorFactoryTest.SCHEMA_XML; + } + + @Override + protected String getCloudSolrConfig() { + return configString; + } + + @Override + public void doTest() throws Exception { + assertTrue("only one shard?!?!?!", 1 < shardToJetty.keySet().size()); + log.info("number of shards: {}", shardToJetty.keySet().size()); + + handle.clear(); + handle.put("maxScore", SKIPVAL); + handle.put("QTime", SKIPVAL); + handle.put("timestamp", SKIPVAL); + + // some docs with no expiration + for (int i = 1; i <= 100; i++) { + indexDoc(sdoc("id", i)); + } + commit(); + waitForThingsToLevelOut(30); + + // this doc better not already exist + waitForNoResults(0, params("q","id:999","rows","0","_trace","sanity_check")); + + // record the indexversion for each server so we can check later + // that it only changes for one shard + final Map initIndexVersions = getIndexVersionOfAllReplicas(); + assertTrue("WTF? no versions?", 0 < initIndexVersions.size()); + + + // add a doc with a short TTL + indexDoc(sdoc("id", "999", "tTl_s","+30SECONDS")); + commit(); + + // wait for one doc to be deleted + waitForNoResults(180, params("q","id:999","rows","0","_trace","did_it_expire_yet")); + + // verify only one shard changed + waitForThingsToLevelOut(30); + final Map finalIndexVersions = getIndexVersionOfAllReplicas(); + assertEquals("WTF? not same num versions?", + initIndexVersions.size(), + finalIndexVersions.size()); + + final Set nodesThatChange = new HashSet(); + final Set shardsThatChange = new HashSet(); + + int coresCompared = 0; + for (String shard : shardToJetty.keySet()) { + for (CloudJettyRunner replicaRunner : shardToJetty.get(shard)) { + coresCompared++; + + String core = replicaRunner.coreNodeName; + Long initVersion = initIndexVersions.get(core); + Long finalVersion = finalIndexVersions.get(core); + assertNotNull(shard + ": no init version for core: " + core, initVersion); + assertNotNull(shard + ": no final version for core: " + core, finalVersion); + + if (!initVersion.equals(finalVersion)) { + nodesThatChange.add(core + "("+shard+")"); + shardsThatChange.add(shard); + } + } + } + + assertEquals("Exactly one shard should have changed, instead: " + shardsThatChange + + " nodes=(" + nodesThatChange + ")", + 1, shardsThatChange.size()); + assertEquals("somehow we missed some cores?", + initIndexVersions.size(), coresCompared); + + // TODO: above logic verifies that deleteByQuery happens on all nodes, and ... + // doesn't affect searcher re-open on shards w/o expired docs ... can we also verify + // that *only* one node is sending the deletes ? + // (ie: no flood of redundent deletes?) + + } + + /** + * returns a map whose key is the coreNodeName and whose value is what the replication + * handler returns for the indexversion + */ + private Map getIndexVersionOfAllReplicas() throws IOException, SolrServerException { + Map results = new HashMap(); + + for (List listOfReplicas : shardToJetty.values()) { + for (CloudJettyRunner replicaRunner : listOfReplicas) { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("command","indexversion"); + params.set("_trace","getIndexVersion"); + params.set("qt","/replication"); + QueryRequest req = new QueryRequest(params); + + NamedList res = replicaRunner.client.solrClient.request(req); + assertNotNull("null response from server: " + replicaRunner.coreNodeName, res); + + Object version = res.get("indexversion"); + assertNotNull("null version from server: " + replicaRunner.coreNodeName, version); + assertTrue("version isn't a long: "+replicaRunner.coreNodeName, + version instanceof Long); + results.put(replicaRunner.coreNodeName, (Long)version); + + long numDocs = replicaRunner.client.solrClient.query + (params("q","*:*","distrib","false","rows","0","_trace","counting_docs")) + .getResults().getNumFound(); + log.info("core=" + replicaRunner.coreNodeName + "; ver=" + version + + "; numDocs=" + numDocs); + + } + } + + return results; + } + + /** + * Executes a query over and over against the cloudClient every 5 seconds + * until the numFound is 0 or the maxTimeLimitSeconds is exceeded. + * Query is garunteed to be executed at least once. + */ + private void waitForNoResults(int maxTimeLimitSeconds, + SolrParams params) + throws SolrServerException, InterruptedException { + + final long giveUpAfter = System.currentTimeMillis() + (1000L * maxTimeLimitSeconds); + long numFound = cloudClient.query(params).getResults().getNumFound(); + while (0L < numFound && System.currentTimeMillis() < giveUpAfter) { + Thread.sleep(Math.min(5000, giveUpAfter - System.currentTimeMillis())); + numFound = cloudClient.query(params).getResults().getNumFound(); + } + assertEquals("Give up waiting for no results: " + params, + 0L, numFound); + } + +} diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java index 777a17886d0..14a9d254d8d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java @@ -39,12 +39,17 @@ import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CloudSolrServer; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CollectionParams.CollectionAction; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; +import org.apache.zookeeper.data.Stat; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; + @LuceneTestCase.Slow @SuppressSSL // Currently unknown why SSL does not work public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ @@ -85,11 +90,43 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ @Override public void doTest() throws Exception { - addOverseerRole2ExistingNodes(); + testOverseerRole(); + testQuitCommand(); } - private void addOverseerRole2ExistingNodes() throws Exception { + private void testQuitCommand() throws Exception{ + String collectionName = "testOverseerQuit"; + + createCollection(collectionName, client); + + waitForRecoveriesToFinish(collectionName, false); + + SolrZkClient zk = client.getZkStateReader().getZkClient(); + byte[] data = new byte[0]; + data = zk.getData("/overseer_elect/leader", null, new Stat(), true); + Map m = (Map) ZkStateReader.fromJSON(data); + String s = (String) m.get("id"); + String leader = LeaderElector.getNodeName(s); + Overseer.getInQueue(zk).offer(ZkStateReader.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, Overseer.QUIT))); + long timeout = System.currentTimeMillis()+5000; + String newLeader=null; + for(;System.currentTimeMillis() < timeout;){ + newLeader = OverseerCollectionProcessor.getLeaderNode(zk); + if(!newLeader.equals(leader)) break; + Thread.sleep(100); + } + assertNotSame( "Leader not changed yet",newLeader,leader); + + + + assertTrue("The old leader should have rejoined election ", OverseerCollectionProcessor.getSortedOverseerNodeNames(zk).contains(leader)); + } + + + + + private void testOverseerRole() throws Exception { String collectionName = "testOverseerCol"; createCollection(collectionName, client); @@ -202,13 +239,6 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ assertTrue("New overseer not the frontrunner : "+ getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + " expected : "+ killedOverseer, leaderchanged); - - - - - client.shutdown(); - - } private void setOverseerRole(CollectionAction action, String overseerDesignate) throws Exception, IOException { diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java index 3ce5aac09e0..8836a7cc3a4 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java @@ -983,7 +983,7 @@ public class OverseerTest extends SolrTestCaseJ4 { overseers.get(overseers.size() -1).getZkStateReader().getZkClient().close(); } Overseer overseer = new Overseer( - new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader); + new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader,null); overseers.add(overseer); ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_")); diff --git a/solr/core/src/test/org/apache/solr/cloud/SolrXmlInZkTest.java b/solr/core/src/test/org/apache/solr/cloud/SolrXmlInZkTest.java index 397fe9fd373..45f05b0eb4b 100644 --- a/solr/core/src/test/org/apache/solr/cloud/SolrXmlInZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/SolrXmlInZkTest.java @@ -19,6 +19,7 @@ package org.apache.solr.cloud; import java.io.File; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; import org.apache.commons.io.FileUtils; import org.apache.solr.SolrTestCaseJ4; @@ -35,7 +36,6 @@ import org.junit.rules.RuleChain; import org.junit.rules.TestRule; import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; -import com.google.common.base.Charsets; public class SolrXmlInZkTest extends SolrTestCaseJ4 { @@ -84,7 +84,7 @@ public class SolrXmlInZkTest extends SolrTestCaseJ4 { zkClient = new SolrZkClient(zkServer.getZkAddress(), AbstractZkTestCase.TIMEOUT); if (toZk) { - zkClient.makePath("solr.xml", XML_FOR_ZK.getBytes(Charsets.UTF_8), true); + zkClient.makePath("solr.xml", XML_FOR_ZK.getBytes(StandardCharsets.UTF_8), true); } zkClient.close(); diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCollectionAPI.java b/solr/core/src/test/org/apache/solr/cloud/TestCollectionAPI.java new file mode 100644 index 00000000000..329ec34370f --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestCollectionAPI.java @@ -0,0 +1,241 @@ +package org.apache.solr.cloud; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import com.google.common.collect.Lists; +import org.apache.solr.client.solrj.SolrRequest; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrServer; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.CollectionParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.util.NamedList; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public class TestCollectionAPI extends AbstractFullDistribZkTestBase { + + public static final String COLLECTION_NAME = "testcollection"; + public static final String COLLECTION_NAME1 = "testcollection1"; + + public TestCollectionAPI() { + schemaString = "schema15.xml"; // we need a string id + } + + @Override + @Before + public void setUp() throws Exception { + fixShardCount = true; + sliceCount = 2; + shardCount = 2; + super.setUp(); + } + + @Override + public void doTest() throws Exception { + CloudSolrServer client = createCloudClient(null); + try { + createCollection(null, COLLECTION_NAME, 2, 1, 1, client, null, "conf1"); + createCollection(null, COLLECTION_NAME1, 1, 1, 1, client, null, "conf1"); + } finally { + //remove collections + client.shutdown(); + } + + listCollection(); + clusterStatusNoCollection(); + clusterStatusWithCollection(); + clusterStatusWithCollectionAndShard(); + clusterStatusWithRouteKey(); + clusterStatusAliasTest(); + } + + private void clusterStatusWithCollectionAndShard() throws IOException, SolrServerException { + CloudSolrServer client = createCloudClient(null); + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CLUSTERSTATUS.toString()); + params.set("collection", COLLECTION_NAME); + params.set("shard", SHARD1); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + NamedList cluster = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cluster); + NamedList collections = (NamedList) cluster.get("collections"); + assertNotNull("Collections should not be null in cluster state", collections); + assertNotNull(collections.get(COLLECTION_NAME)); + assertEquals(1, collections.size()); + Map collection = (Map) collections.get(COLLECTION_NAME); + Map shardStatus = (Map) collection.get("shards"); + assertEquals(1, shardStatus.size()); + Map selectedShardStatus = (Map) shardStatus.get(SHARD1); + assertNotNull(selectedShardStatus); + + } finally { + //remove collections + client.shutdown(); + } + } + + + private void listCollection() throws IOException, SolrServerException { + CloudSolrServer client = createCloudClient(null); + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.LIST.toString()); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + List collections = (List) rsp.get("collections"); + assertTrue("control_collection was not found in list", collections.contains("control_collection")); + assertTrue(DEFAULT_COLLECTION + " was not found in list", collections.contains(DEFAULT_COLLECTION)); + assertTrue(COLLECTION_NAME + " was not found in list", collections.contains(COLLECTION_NAME)); + assertTrue(COLLECTION_NAME1 + " was not found in list", collections.contains(COLLECTION_NAME1)); + } finally { + //remove collections + client.shutdown(); + } + + + } + + private void clusterStatusNoCollection() throws Exception { + CloudSolrServer client = createCloudClient(null); + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CLUSTERSTATUS.toString()); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + NamedList cluster = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cluster); + NamedList collections = (NamedList) cluster.get("collections"); + assertNotNull("Collections should not be null in cluster state", collections); + assertNotNull(collections.get(COLLECTION_NAME1)); + assertEquals(4, collections.size()); + + } finally { + //remove collections + client.shutdown(); + } + + } + + private void clusterStatusWithCollection() throws IOException, SolrServerException { + CloudSolrServer client = createCloudClient(null); + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CLUSTERSTATUS.toString()); + params.set("collection", COLLECTION_NAME); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + NamedList cluster = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cluster); + NamedList collections = (NamedList) cluster.get("collections"); + assertNotNull("Collections should not be null in cluster state", collections); + assertNotNull(collections.get(COLLECTION_NAME)); + assertEquals(1, collections.size()); + } finally { + //remove collections + client.shutdown(); + } + } + + private void clusterStatusWithRouteKey() throws IOException, SolrServerException { + CloudSolrServer client = createCloudClient(DEFAULT_COLLECTION); + try { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "a!123"); // goes to shard2. see ShardRoutingTest for details + client.add(doc); + client.commit(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CLUSTERSTATUS.toString()); + params.set("collection", DEFAULT_COLLECTION); + params.set(ShardParams._ROUTE_, "a!"); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + NamedList cluster = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cluster); + NamedList collections = (NamedList) cluster.get("collections"); + assertNotNull("Collections should not be null in cluster state", collections); + assertNotNull(collections.get(DEFAULT_COLLECTION)); + assertEquals(1, collections.size()); + Map collection = (Map) collections.get(DEFAULT_COLLECTION); + Map shardStatus = (Map) collection.get("shards"); + assertEquals(1, shardStatus.size()); + Map selectedShardStatus = (Map) shardStatus.get(SHARD2); + assertNotNull(selectedShardStatus); + } finally { + //remove collections + client.shutdown(); + } + } + + private void clusterStatusAliasTest() throws Exception { + CloudSolrServer client = createCloudClient(null); + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CREATEALIAS.toString()); + params.set("name", "myalias"); + params.set("collections", DEFAULT_COLLECTION + "," + COLLECTION_NAME); + SolrRequest request = new QueryRequest(params); + request.setPath("/admin/collections"); + client.request(request); + params = new ModifiableSolrParams(); + params.set("action", CollectionParams.CollectionAction.CLUSTERSTATUS.toString()); + params.set("collection", DEFAULT_COLLECTION); + request = new QueryRequest(params); + request.setPath("/admin/collections"); + + NamedList rsp = client.request(request); + + + NamedList cluster = (NamedList) rsp.get("cluster"); + assertNotNull("Cluster state should not be null", cluster); + Map aliases = (Map) cluster.get("aliases"); + assertNotNull("Aliases should not be null", aliases); + assertEquals("Alias: myalias not found in cluster status", + DEFAULT_COLLECTION + "," + COLLECTION_NAME, aliases.get("myalias")); + + NamedList collections = (NamedList) cluster.get("collections"); + assertNotNull("Collections should not be null in cluster state", collections); + assertNotNull(collections.get(DEFAULT_COLLECTION)); + Map collection = (Map) collections.get(DEFAULT_COLLECTION); + List collAlias = (List) collection.get("aliases"); + assertEquals("Aliases not found", Lists.newArrayList("myalias"), collAlias); + } finally { + //remove collections + client.shutdown(); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestModifyConfFiles.java b/solr/core/src/test/org/apache/solr/cloud/TestModifyConfFiles.java index d92e3e87c04..c572a7edf51 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestModifyConfFiles.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestModifyConfFiles.java @@ -26,6 +26,7 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import java.io.File; +import java.nio.charset.StandardCharsets; public class TestModifyConfFiles extends AbstractFullDistribZkTestBase { @@ -93,7 +94,7 @@ public class TestModifyConfFiles extends AbstractFullDistribZkTestBase { client.request(request); SolrZkClient zkClient = cloudClient.getZkStateReader().getZkClient(); - String contents = new String(zkClient.getData("/configs/conf1/schema.xml", null, null, true), "UTF-8"); + String contents = new String(zkClient.getData("/configs/conf1/schema.xml", null, null, true), StandardCharsets.UTF_8); assertTrue("Schema contents should have changed!", contents.contains("")); @@ -107,7 +108,7 @@ public class TestModifyConfFiles extends AbstractFullDistribZkTestBase { client.request(request); - contents = new String(zkClient.getData("/configs/conf1/velocity/test.vm", null, null, true), "UTF-8"); + contents = new String(zkClient.getData("/configs/conf1/velocity/test.vm", null, null, true), StandardCharsets.UTF_8); assertTrue("Should have found new content in a velocity/test.vm.", contents.indexOf("Some bogus stuff for a test.") != -1); diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java index 08bbd3e02bb..26f9894a566 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.List; @@ -156,7 +157,7 @@ public class ZkCLITest extends SolrTestCaseJ4 { zkClient.getData("/data.txt", null, null, true); - assertArrayEquals(zkClient.getData("/data.txt", null, null, true), data.getBytes("UTF-8")); + assertArrayEquals(zkClient.getData("/data.txt", null, null, true), data.getBytes(StandardCharsets.UTF_8)); } @Test @@ -166,12 +167,12 @@ public class ZkCLITest extends SolrTestCaseJ4 { "putfile", "/solr.xml", SOLR_HOME + File.separator + "solr-stress-new.xml"}; ZkCLI.main(args); - String fromZk = new String(zkClient.getData("/solr.xml", null, null, true), "UTF-8"); + String fromZk = new String(zkClient.getData("/solr.xml", null, null, true), StandardCharsets.UTF_8); File locFile = new File(SOLR_HOME + File.separator + "solr-stress-new.xml"); InputStream is = new FileInputStream(locFile); String fromLoc; try { - fromLoc = new String(IOUtils.toByteArray(is), "UTF-8"); + fromLoc = new String(IOUtils.toByteArray(is), StandardCharsets.UTF_8); } finally { IOUtils.closeQuietly(is); } @@ -268,7 +269,7 @@ public class ZkCLITest extends SolrTestCaseJ4 { @Test public void testGet() throws Exception { String getNode = "/getNode"; - byte [] data = new String("getNode-data").getBytes("UTF-8"); + byte [] data = new String("getNode-data").getBytes(StandardCharsets.UTF_8); this.zkClient.create(getNode, data, CreateMode.PERSISTENT, true); String[] args = new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "get", getNode}; @@ -280,7 +281,7 @@ public class ZkCLITest extends SolrTestCaseJ4 { File tmpDir = createTempDir(); String getNode = "/getFileNode"; - byte [] data = new String("getFileNode-data").getBytes("UTF-8"); + byte [] data = new String("getFileNode-data").getBytes(StandardCharsets.UTF_8); this.zkClient.create(getNode, data, CreateMode.PERSISTENT, true); File file = new File(tmpDir, diff --git a/solr/core/src/test/org/apache/solr/core/CoreContainerCoreInitFailuresTest.java b/solr/core/src/test/org/apache/solr/core/CoreContainerCoreInitFailuresTest.java index 1ca5f2e0895..0b1b77b5328 100644 --- a/solr/core/src/test/org/apache/solr/core/CoreContainerCoreInitFailuresTest.java +++ b/solr/core/src/test/org/apache/solr/core/CoreContainerCoreInitFailuresTest.java @@ -58,7 +58,7 @@ public class CoreContainerCoreInitFailuresTest extends SolrTestCaseJ4 { // solr.xml File solrXml = new File(solrHome, "solr.xml"); - FileUtils.write(solrXml, EMPTY_SOLR_XML, IOUtils.CHARSET_UTF_8.toString()); + FileUtils.write(solrXml, EMPTY_SOLR_XML, IOUtils.UTF_8); // ---- // init the CoreContainer @@ -133,7 +133,7 @@ public class CoreContainerCoreInitFailuresTest extends SolrTestCaseJ4 { // start with two collections: one valid, and one broken File solrXml = new File(solrHome, "solr.xml"); - FileUtils.write(solrXml, BAD_SOLR_XML, IOUtils.CHARSET_UTF_8.toString()); + FileUtils.write(solrXml, BAD_SOLR_XML, IOUtils.UTF_8); // our "ok" collection FileUtils.copyFile(getFile("solr/collection1/conf/solrconfig-defaults.xml"), @@ -272,7 +272,7 @@ public class CoreContainerCoreInitFailuresTest extends SolrTestCaseJ4 { FileUtils.write (FileUtils.getFile(solrHome, "col_bad", "conf", "solrconfig.xml"), "This is giberish, not valid XML <", - IOUtils.CHARSET_UTF_8.toString()); + IOUtils.UTF_8); try { ignoreException(Pattern.quote("SAX")); diff --git a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index 542af843db7..3d3f00bb5dd 100644 --- a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -21,6 +21,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.Properties; import javax.xml.parsers.ParserConfigurationException; @@ -101,7 +102,7 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ p.put("index", newDir.getName()); Writer os = null; try { - os = new OutputStreamWriter(new FileOutputStream(idxprops), IOUtils.CHARSET_UTF_8); + os = new OutputStreamWriter(new FileOutputStream(idxprops), StandardCharsets.UTF_8); p.store(os, "index properties"); } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java index 65f872fd2cc..3f83e61cf5b 100644 --- a/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java +++ b/solr/core/src/test/org/apache/solr/core/TestCoreContainer.java @@ -25,6 +25,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.jar.JarEntry; @@ -33,7 +34,6 @@ import java.util.jar.JarOutputStream; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.io.FileUtils; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.handler.admin.CollectionsHandler; @@ -44,6 +44,7 @@ import org.junit.BeforeClass; import org.junit.Test; import org.xml.sax.SAXException; + public class TestCoreContainer extends SolrTestCaseJ4 { private static String oldSolrHome; @@ -192,15 +193,22 @@ public class TestCoreContainer extends SolrTestCaseJ4 { assertNotNull(h.getCoreContainer().getLogging()); } - private void SetUpHome(File solrHomeDirectory, String xmlFile) - throws IOException { - File solrXmlFile = new File(solrHomeDirectory, "solr.xml"); - BufferedWriter out = new BufferedWriter(new OutputStreamWriter( - new FileOutputStream(solrXmlFile), IOUtils.CHARSET_UTF_8)); - out.write(xmlFile); - out.close(); - - // init + private void SetUpHome(File solrHomeDirectory, String xmlFile) throws IOException { + if (solrHomeDirectory.exists()) { + FileUtils.deleteDirectory(solrHomeDirectory); + } + assertTrue("Failed to mkdirs workDir", solrHomeDirectory.mkdirs()); + try { + File solrXmlFile = new File(solrHomeDirectory, "solr.xml"); + BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(solrXmlFile), StandardCharsets.UTF_8)); + out.write(xmlFile); + out.close(); + } catch (IOException e) { + FileUtils.deleteDirectory(solrHomeDirectory); + throw e; + } + + //init System.setProperty(SOLR_HOME_PROP, solrHomeDirectory.getAbsolutePath()); } diff --git a/solr/core/src/test/org/apache/solr/core/TestCoreDiscovery.java b/solr/core/src/test/org/apache/solr/core/TestCoreDiscovery.java index f7375fc5329..60d2456fbcc 100644 --- a/solr/core/src/test/org/apache/solr/core/TestCoreDiscovery.java +++ b/solr/core/src/test/org/apache/solr/core/TestCoreDiscovery.java @@ -31,6 +31,8 @@ import org.junit.After; import org.junit.BeforeClass; import org.junit.Test; +import java.nio.charset.StandardCharsets; + public class TestCoreDiscovery extends SolrTestCaseJ4 { @BeforeClass @@ -47,7 +49,7 @@ public class TestCoreDiscovery extends SolrTestCaseJ4 { xmlStr = xmlStr.replace("", " " + alternateCoreDir + " "); } File tmpFile = new File(solrHomeDirectory, ConfigSolr.SOLR_XML_FILE); - FileUtils.write(tmpFile, xmlStr, IOUtils.CHARSET_UTF_8.toString()); + FileUtils.write(tmpFile, xmlStr, IOUtils.UTF_8); } @@ -75,7 +77,7 @@ public class TestCoreDiscovery extends SolrTestCaseJ4 { private void addCoreWithProps(Properties stockProps, File propFile) throws Exception { if (!propFile.getParentFile().exists()) propFile.getParentFile().mkdirs(); - Writer out = new OutputStreamWriter(new FileOutputStream(propFile), IOUtils.CHARSET_UTF_8); + Writer out = new OutputStreamWriter(new FileOutputStream(propFile), StandardCharsets.UTF_8); try { stockProps.store(out, null); } finally { diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrXMLSerializer.java b/solr/core/src/test/org/apache/solr/core/TestSolrXMLSerializer.java index 57c427399ae..056f9745b46 100644 --- a/solr/core/src/test/org/apache/solr/core/TestSolrXMLSerializer.java +++ b/solr/core/src/test/org/apache/solr/core/TestSolrXMLSerializer.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -72,21 +73,21 @@ public class TestSolrXMLSerializer extends SolrTestCaseJ4 { sharedLibVal, adminPathKey, adminPathVal, shareSchemaKey, shareSchemaVal, instanceDirKey, instanceDirVal); - Writer w = new StringWriter(); + StringWriter w = new StringWriter(); try { serializer.persist(w, solrXMLDef); } finally { w.close(); } - assertResults(((StringWriter) w).getBuffer().toString().getBytes("UTF-8")); + assertResults(w.toString().getBytes(StandardCharsets.UTF_8)); // again with default file File tmpFile = TestUtil.createTempFile("solr.xml", null, createTempDir()); serializer.persistFile(tmpFile, solrXMLDef); - assertResults(FileUtils.readFileToString(tmpFile, "UTF-8").getBytes("UTF-8")); + assertResults(FileUtils.readFileToByteArray(tmpFile)); tmpFile.delete(); } diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrXmlPersistence.java b/solr/core/src/test/org/apache/solr/core/TestSolrXmlPersistence.java index 683ce99cae6..f9a5dad4fc2 100644 --- a/solr/core/src/test/org/apache/solr/core/TestSolrXmlPersistence.java +++ b/solr/core/src/test/org/apache/solr/core/TestSolrXmlPersistence.java @@ -31,6 +31,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.lucene.util.IOUtils; @@ -52,8 +53,8 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; -import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import com.google.common.base.Charsets; +import java.nio.charset.StandardCharsets; public class TestSolrXmlPersistence extends SolrTestCaseJ4 { @@ -75,7 +76,7 @@ public class TestSolrXmlPersistence extends SolrTestCaseJ4 { } File solrXml = new File(solrHomeDirectory, "solr.xml"); - FileUtils.write(solrXml, solrXmlString, IOUtils.CHARSET_UTF_8.toString()); + FileUtils.write(solrXml, solrXmlString, IOUtils.UTF_8); final CoreContainer cores = createCoreContainer(solrHomeDirectory.getAbsolutePath(), solrXmlString); return cores; @@ -401,7 +402,7 @@ public class TestSolrXmlPersistence extends SolrTestCaseJ4 { String defXml = FileUtils.readFileToString( new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml"), - Charsets.UTF_8.toString()); + StandardCharsets.UTF_8.name()); final CoreContainer cores = init(defXml, "collection1"); SolrXMLCoresLocator.NonPersistingLocator locator = (SolrXMLCoresLocator.NonPersistingLocator) cores.getCoresLocator(); @@ -504,7 +505,7 @@ public class TestSolrXmlPersistence extends SolrTestCaseJ4 { } private String[] getAllNodes(String xmlString) throws ParserConfigurationException, IOException, SAXException { - return getAllNodes(new ByteArrayInputStream(xmlString.getBytes(Charsets.UTF_8))); + return getAllNodes(new ByteArrayInputStream(xmlString.getBytes(StandardCharsets.UTF_8))); } /* diff --git a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java index 5d909131449..c68fb718d79 100644 --- a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java @@ -37,6 +37,7 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.IOException; import java.io.Reader; +import java.nio.charset.StandardCharsets; /** * A test for {@link DocumentAnalysisRequestHandler}. @@ -145,7 +146,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe " Müller\r\n" + " " + "" - ).getBytes("ISO-8859-1"); + ).getBytes(StandardCharsets.ISO_8859_1); // we declare a content stream without charset: final ContentStream cs = new ByteStream(xmlBytes, "application/xml"); @@ -177,7 +178,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe " Müller\r\n" + " " + "" - ).getBytes("ISO-8859-1"); + ).getBytes(StandardCharsets.ISO_8859_1); // we declare a content stream with charset: final ContentStream cs = new ByteStream(xmlBytes, "application/xml; charset=ISO-8859-1"); diff --git a/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java b/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java index 2eb1b066a29..3f9c715147b 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java +++ b/solr/core/src/test/org/apache/solr/handler/TestCSVLoader.java @@ -29,6 +29,7 @@ import org.junit.BeforeClass; import org.junit.Test; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.ArrayList; @@ -41,7 +42,6 @@ public class TestCSVLoader extends SolrTestCaseJ4 { } String filename; - String def_charset = "UTF-8"; File file; @Override @@ -66,12 +66,8 @@ public class TestCSVLoader extends SolrTestCaseJ4 { } void makeFile(String contents) { - makeFile(contents,def_charset); - } - - void makeFile(String contents, String charset) { try { - Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset); + Writer out = new OutputStreamWriter(new FileOutputStream(filename), StandardCharsets.UTF_8); out.write(contents); out.close(); } catch (Exception e) { diff --git a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java index 9e6f99c3413..08e20773aed 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java +++ b/solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java @@ -28,6 +28,7 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -1473,8 +1474,8 @@ public class TestReplicationHandler extends SolrTestCaseJ4 { * character copy of file using UTF-8. If port is non-null, will be substituted any time "TEST_PORT" is found. */ private static void copyFile(File src, File dst, Integer port, boolean internalCompression) throws IOException { - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(src), "UTF-8")); - Writer out = new OutputStreamWriter(new FileOutputStream(dst), "UTF-8"); + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(src), StandardCharsets.UTF_8)); + Writer out = new OutputStreamWriter(new FileOutputStream(dst), StandardCharsets.UTF_8); for (String line = in.readLine(); null != line; line = in.readLine()) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/CoreAdminCreateDiscoverTest.java b/solr/core/src/test/org/apache/solr/handler/admin/CoreAdminCreateDiscoverTest.java index 8b97f7124ad..33347f9a01d 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/CoreAdminCreateDiscoverTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/CoreAdminCreateDiscoverTest.java @@ -24,7 +24,6 @@ import java.io.InputStreamReader; import java.util.Properties; import org.apache.commons.io.FileUtils; -import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CoreAdminParams; @@ -35,6 +34,8 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import java.nio.charset.StandardCharsets; + public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 { private static File solrHomeDirectory = null; @@ -108,7 +109,7 @@ public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 { File propFile = new File(solrHomeDirectory, coreSysProps + "/" + CorePropertiesLocator.PROPERTIES_FILENAME); FileInputStream is = new FileInputStream(propFile); try { - props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); + props.load(new InputStreamReader(is, StandardCharsets.UTF_8)); } finally { org.apache.commons.io.IOUtils.closeQuietly(is); } @@ -250,7 +251,7 @@ public class CoreAdminCreateDiscoverTest extends SolrTestCaseJ4 { File propFile = new File(solrHomeDirectory, coreNormal + "/" + CorePropertiesLocator.PROPERTIES_FILENAME); FileInputStream is = new FileInputStream(propFile); try { - props.load(new InputStreamReader(is, IOUtils.CHARSET_UTF_8)); + props.load(new InputStreamReader(is, StandardCharsets.UTF_8)); } finally { org.apache.commons.io.IOUtils.closeQuietly(is); } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java index d328bb82389..9abf451c5b9 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedExpandComponentTest.java @@ -88,6 +88,9 @@ public class DistributedExpandComponentTest extends BaseDistributedSearchTestCas query("q", "test_ti:5", "fq", "{!collapse field=group_s}", "defType", "edismax", "bf", "field(test_ti)", "expand", "true", "expand.sort", "test_tl desc", "expand.rows", "1", "fl","*,score"); //Test zero results query("q", "test_ti:5434343", "fq", "{!collapse field=group_s}", "defType", "edismax", "bf", "field(test_ti)", "expand", "true", "expand.sort", "test_tl desc", "expand.rows", "1", "fl","*,score"); + //Test page 2 + query("q", "*:*", "start","1", "rows", "1", "fq", "{!collapse field=group_s}", "defType", "edismax", "bf", "field(test_ti)", "expand", "true", "fl","*,score"); + //First basic test case. ModifiableSolrParams params = new ModifiableSolrParams(); diff --git a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java index f9da1725f8d..a26ee20d7b5 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java @@ -19,7 +19,6 @@ package org.apache.solr.handler.component; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; @@ -38,6 +37,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; @@ -674,7 +674,7 @@ public class QueryElevationComponentTest extends SolrTestCaseJ4 { // write a test file to boost some docs private void writeFile(File file, String query, String... ids) throws Exception { - PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), IOUtils.CHARSET_UTF_8)); + PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8)); out.println(""); out.println(""); out.println(""); diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java index 792403e96ff..ea55c78c187 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java +++ b/solr/core/src/test/org/apache/solr/handler/component/TestExpandComponent.java @@ -45,10 +45,10 @@ public class TestExpandComponent extends SolrTestCaseJ4 { @Test public void testExpand() throws Exception { - String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"}; + String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000", "type_s", "parent"}; assertU(adoc(doc)); assertU(commit()); - String[] doc1 = {"id","2", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200"}; + String[] doc1 = {"id","2", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200", "type_s", "child"}; assertU(adoc(doc1)); String[] doc2 = {"id","3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200"}; @@ -58,23 +58,21 @@ public class TestExpandComponent extends SolrTestCaseJ4 { assertU(adoc(doc3)); - String[] doc4 = {"id","5", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", "2000"}; + String[] doc4 = {"id","5", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", "2000", "type_s", "parent"}; assertU(adoc(doc4)); assertU(commit()); - String[] doc5 = {"id","6", "term_s","YYYY", "group_s", "group2", "test_ti", "10", "test_tl", "100", "test_tf", "200"}; + String[] doc5 = {"id","6", "term_s","YYYY", "group_s", "group2", "test_ti", "10", "test_tl", "100", "test_tf", "200", "type_s", "child"}; assertU(adoc(doc5)); assertU(commit()); - String[] doc6 = {"id","7", "term_s", "YYYY", "group_s", "group1", "test_ti", "1", "test_tl", "100000", "test_tf", "2000"}; + String[] doc6 = {"id","7", "term_s", "YYYY", "group_s", "group1", "test_ti", "1", "test_tl", "100000", "test_tf", "2000", "type_s", "child"}; assertU(adoc(doc6)); assertU(commit()); - String[] doc7 = {"id","8", "term_s","YYYY", "group_s", "group2", "test_ti", "2", "test_tl", "100000", "test_tf", "200"}; + String[] doc7 = {"id","8", "term_s","YYYY", "group_s", "group2", "test_ti", "2", "test_tl", "100000", "test_tf", "200", "type_s", "child"}; assertU(adoc(doc7)); assertU(commit()); - - //First basic test case. ModifiableSolrParams params = new ModifiableSolrParams(); params.add("q", "*:*"); @@ -92,6 +90,23 @@ public class TestExpandComponent extends SolrTestCaseJ4 { "/response/lst[@name='expanded']/result[@name='group2']/doc[2]/float[@name='id'][.='8.0']" ); + //Basic test case page 2 + + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "{!collapse field=group_s}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("expand", "true"); + params.add("rows", "1"); + params.add("start", "1"); + assertQ(req(params), "*[count(/response/result/doc)=1]", + "*[count(/response/lst[@name='expanded']/result)=1]", + "/response/result/doc[1]/float[@name='id'][.='6.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[1]/float[@name='id'][.='5.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[2]/float[@name='id'][.='8.0']" + ); + //Test expand.sort params = new ModifiableSolrParams(); params.add("q", "*:*"); @@ -131,6 +146,70 @@ public class TestExpandComponent extends SolrTestCaseJ4 { ); + //Test overide expand.q + + params = new ModifiableSolrParams(); + params.add("q", "type_s:parent"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("expand", "true"); + params.add("expand.q", "type_s:child"); + params.add("expand.field", "group_s"); + params.add("expand.sort", "test_tl desc"); + assertQ(req(params), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "/response/result/doc[1]/float[@name='id'][.='1.0']", + "/response/result/doc[2]/float[@name='id'][.='5.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[1]/float[@name='id'][.='7.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[2]/float[@name='id'][.='2.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[1]/float[@name='id'][.='8.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[2]/float[@name='id'][.='6.0']" + ); + + + //Test overide expand.fq + + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "type_s:parent"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("expand", "true"); + params.add("expand.fq", "type_s:child"); + params.add("expand.field", "group_s"); + params.add("expand.sort", "test_tl desc"); + assertQ(req(params), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "/response/result/doc[1]/float[@name='id'][.='1.0']", + "/response/result/doc[2]/float[@name='id'][.='5.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[1]/float[@name='id'][.='7.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[2]/float[@name='id'][.='2.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[1]/float[@name='id'][.='8.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[2]/float[@name='id'][.='6.0']" + ); + + //Test overide expand.fq and expand.q + + params = new ModifiableSolrParams(); + params.add("q", "*:*"); + params.add("fq", "type_s:parent"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("expand", "true"); + params.add("expand.q", "type_s:child"); + params.add("expand.fq", "*:*"); + params.add("expand.field", "group_s"); + params.add("expand.sort", "test_tl desc"); + assertQ(req(params), "*[count(/response/result/doc)=2]", + "*[count(/response/lst[@name='expanded']/result)=2]", + "/response/result/doc[1]/float[@name='id'][.='1.0']", + "/response/result/doc[2]/float[@name='id'][.='5.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[1]/float[@name='id'][.='7.0']", + "/response/lst[@name='expanded']/result[@name='group1']/doc[2]/float[@name='id'][.='2.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[1]/float[@name='id'][.='8.0']", + "/response/lst[@name='expanded']/result[@name='group2']/doc[2]/float[@name='id'][.='6.0']" + ); + //Test expand.rows params = new ModifiableSolrParams(); @@ -179,15 +258,6 @@ public class TestExpandComponent extends SolrTestCaseJ4 { assertQ(req(params), "*[count(/response/result/doc)=0]", "*[count(/response/lst[@name='expanded']/result)=0]" ); - - - - - - } - - - } diff --git a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java index 102533930c8..ec41fe1a47a 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java @@ -24,7 +24,6 @@ import org.apache.solr.schema.IndexSchema; import org.junit.BeforeClass; /** simple tests for PostingsSolrHighlighter */ -@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"}) public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 { @BeforeClass diff --git a/solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java b/solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java index 37ad252cfc0..9135bc40533 100644 --- a/solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java +++ b/solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java @@ -19,6 +19,7 @@ package org.apache.solr.internal.csv.writer; import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; import junit.framework.TestCase; @@ -57,7 +58,7 @@ public class CSVConfigGuesserTest extends TestCase { StringBuilder sb = new StringBuilder(); sb.append("1234;abcd;1234\n"); sb.append("abcd;1234;abcd"); - ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); + ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8)); CSVConfigGuesser guesser = new CSVConfigGuesser(in); CSVConfig guessed = guesser.guess(); assertEquals(expected.isFixedWidth(), guessed.isFixedWidth()); @@ -80,7 +81,7 @@ public class CSVConfigGuesserTest extends TestCase { StringBuilder sb = new StringBuilder(); sb.append("1,2,3,4\n"); sb.append("abcd,1234,abcd,1234"); - ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); + ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8)); CSVConfigGuesser guesser = new CSVConfigGuesser(in); CSVConfig guessed = guesser.guess(); assertEquals(expected.isFixedWidth(), guessed.isFixedWidth()); diff --git a/solr/core/src/test/org/apache/solr/request/JSONWriterTest.java b/solr/core/src/test/org/apache/solr/request/JSONWriterTest.java index 84350beb237..9d2ca2ee1c9 100644 --- a/solr/core/src/test/org/apache/solr/request/JSONWriterTest.java +++ b/solr/core/src/test/org/apache/solr/request/JSONWriterTest.java @@ -19,6 +19,7 @@ package org.apache.solr.request; import java.io.IOException; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; @@ -92,7 +93,7 @@ public class JSONWriterTest extends SolrTestCaseJ4 { rsp.add("byte", Byte.valueOf((byte)-3)); rsp.add("short", Short.valueOf((short)-4)); - rsp.add("bytes", "abc".getBytes("UTF-8")); + rsp.add("bytes", "abc".getBytes(StandardCharsets.UTF_8)); w.write(buf, req, rsp); jsonEq("{\"nl\":[[\"data1\",\"he\\u2028llo\\u2029!\"],[null,42]],\"byte\":-3,\"short\":-4,\"bytes\":\"YWJj\"}", buf.toString()); diff --git a/solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java b/solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java index 14f88271e72..fce5d7871de 100644 --- a/solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java +++ b/solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java @@ -44,6 +44,7 @@ import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; /** * See SOLR-2854. @@ -99,7 +100,7 @@ public class TestRemoteStreaming extends SolrJettyTestBase { InputStream inputStream = (InputStream) obj; try { StringWriter strWriter = new StringWriter(); - IOUtils.copy(new InputStreamReader(inputStream, "UTF-8"),strWriter); + IOUtils.copy(new InputStreamReader(inputStream, StandardCharsets.UTF_8),strWriter); return strWriter.toString(); } finally { IOUtils.closeQuietly(inputStream); diff --git a/solr/core/src/test/org/apache/solr/request/TestWriterPerf.java b/solr/core/src/test/org/apache/solr/request/TestWriterPerf.java index d9a760f7f97..83b91491127 100644 --- a/solr/core/src/test/org/apache/solr/request/TestWriterPerf.java +++ b/solr/core/src/test/org/apache/solr/request/TestWriterPerf.java @@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import org.apache.solr.client.solrj.ResponseParser; @@ -119,7 +120,7 @@ public class TestWriterPerf extends AbstractSolrTestCase { out = new ByteArrayOutputStream(); // to be fair, from my previous tests, much of the performance will be sucked up // by java's UTF-8 encoding/decoding, not the actual writing - Writer writer = new OutputStreamWriter(out, "UTF-8"); + Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); w.write(writer, req, rsp); writer.close(); } diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java new file mode 100644 index 00000000000..1c91ab9142e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymFilterFactory.java @@ -0,0 +1,178 @@ +package org.apache.solr.rest.schema.analysis; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.util.RestTestBase; +import org.eclipse.jetty.servlet.ServletHolder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.noggit.JSONUtil; +import org.restlet.ext.servlet.ServerServlet; + +public class TestManagedSynonymFilterFactory extends RestTestBase { + + private static File tmpSolrHome; + + /** + * Setup to make the schema mutable + */ + @Before + public void before() throws Exception { + tmpSolrHome = createTempDir(); + FileUtils.copyDirectory(new File(TEST_HOME()), tmpSolrHome.getAbsoluteFile()); + + final SortedMap extraServlets = new TreeMap<>(); + final ServletHolder solrRestApi = new ServletHolder("SolrSchemaRestApi", ServerServlet.class); + solrRestApi.setInitParameter("org.restlet.application", "org.apache.solr.rest.SolrSchemaRestApi"); + extraServlets.put(solrRestApi, "/schema/*"); + + System.setProperty("managed.schema.mutable", "true"); + System.setProperty("enable.update.log", "false"); + createJettyAndHarness(tmpSolrHome.getAbsolutePath(), "solrconfig-managed-schema.xml", "schema-rest.xml", + "/solr", true, extraServlets); + } + + @After + private void after() throws Exception { + jetty.stop(); + jetty = null; + FileUtils.deleteDirectory(tmpSolrHome); + System.clearProperty("managed.schema.mutable"); + System.clearProperty("enable.update.log"); + } + + @Test + public void testManagedSynonyms() throws Exception { + // this endpoint depends on at least one field type containing the following + // declaration in the schema-rest.xml: + // + // + // + String endpoint = "/schema/analysis/synonyms/english"; + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==false", + "/synonymMappings/managedMap=={}"); + + // put a new mapping into the synonyms + Map> syns = new HashMap<>(); + syns.put("happy", Arrays.asList("glad","cheerful","joyful")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/happy==['cheerful','glad','joyful']"); + + // request to a specific mapping + assertJQ(endpoint+"/happy", + "/happy==['cheerful','glad','joyful']"); + + // does not exist + assertJQ(endpoint+"/sad", + "/error/code==404"); + + // verify the user can update the ignoreCase initArg + assertJPut(endpoint, + json("{ 'initArgs':{ 'ignoreCase':true } }"), + "responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==true"); + + syns = new HashMap<>(); + syns.put("sad", Arrays.asList("unhappy")); + syns.put("SAD", Arrays.asList("Unhappy")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/sad==['unhappy']"); + + // verify delete works + assertJDelete(endpoint+"/sad", + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap=={'happy':['cheerful','glad','joyful']}"); + + // should fail with 404 as foo doesn't exist + assertJDelete(endpoint+"/foo", + "/error/code==404"); + + // verify that a newly added synonym gets expanded on the query side after core reload + + String newFieldName = "managed_en_field"; + // make sure the new field doesn't already exist + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 0", + "/response/lst[@name='responseHeader']/int[@name='status'] = '404'", + "/response/lst[@name='error']/int[@name='code'] = '404'"); + + // add the new field + assertJPut("/schema/fields/" + newFieldName, json("{'type':'managed_en'}"), + "/responseHeader/status==0"); + + // make sure the new field exists now + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 1", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'"); + + assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150")); + assertU(commit()); + + assertQ("/select?q=" + newFieldName + ":angry", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + + // add a mapping that will expand a query for "mad" to match docs with "angry" + syns = new HashMap<>(); + syns.put("mad", Arrays.asList("angry")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/mad==['angry']"); + + // should not match as the synonym mapping between mad and angry does not + // get applied until core reload + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='0']"); + + restTestHarness.reload(); + + // now query for mad and we should see our test doc + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + } +} diff --git a/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java b/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java index f7135f46302..f30b7ba96bb 100644 --- a/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/PreAnalyzedFieldTest.java @@ -72,7 +72,7 @@ public class PreAnalyzedFieldTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { - initCore("solrconfig.xml","schema.xml"); + initCore("solrconfig-minimal.xml","schema-preanalyzed.xml"); } @Override @@ -101,6 +101,12 @@ public class PreAnalyzedFieldTest extends SolrTestCaseJ4 { } } } + + @Test + public void testValidSimple2() { + assertU(adoc("id", "1", + "pre", "{\"v\":\"1\",\"str\":\"document one\",\"tokens\":[{\"t\":\"one\"},{\"t\":\"two\"},{\"t\":\"three\",\"i\":100}]}")); + } @Test public void testInvalidSimple() { diff --git a/solr/core/src/test/org/apache/solr/schema/SortableBinaryField.java b/solr/core/src/test/org/apache/solr/schema/SortableBinaryField.java index ea423abca59..a01dbd186bd 100644 --- a/solr/core/src/test/org/apache/solr/schema/SortableBinaryField.java +++ b/solr/core/src/test/org/apache/solr/schema/SortableBinaryField.java @@ -24,7 +24,6 @@ import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparatorSource; import org.apache.lucene.search.SortField; import org.apache.lucene.util.BytesRef; -import org.apache.solr.common.util.Base64; import java.io.IOException; import java.nio.ByteBuffer; @@ -81,20 +80,11 @@ public class SortableBinaryField extends BinaryField { @Override public Object marshalSortValue(Object value) { - if (null == value) { - return null; - } - final BytesRef val = (BytesRef)value; - return Base64.byteArrayToBase64(val.bytes, val.offset, val.length); + return marshalBase64SortValue(value); } @Override public Object unmarshalSortValue(Object value) { - if (null == value) { - return null; - } - final String val = (String)value; - final byte[] bytes = Base64.base64ToByteArray(val); - return new BytesRef(bytes); + return unmarshalBase64SortValue(value); } } diff --git a/solr/core/src/test/org/apache/solr/schema/TestCloudManagedSchema.java b/solr/core/src/test/org/apache/solr/schema/TestCloudManagedSchema.java index 6deaa45b526..15422d1727f 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestCloudManagedSchema.java +++ b/solr/core/src/test/org/apache/solr/schema/TestCloudManagedSchema.java @@ -29,6 +29,7 @@ import org.apache.zookeeper.KeeperException; import org.junit.BeforeClass; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.List; public class TestCloudManagedSchema extends AbstractFullDistribZkTestBase { @@ -89,14 +90,14 @@ public class TestCloudManagedSchema extends AbstractFullDistribZkTestBase { private String getFileContentFromZooKeeper(SolrZkClient zkClient, String fileName) throws IOException, SolrServerException, KeeperException, InterruptedException { - return (new String(zkClient.getData(fileName, null, null, true), "UTF-8")); + return (new String(zkClient.getData(fileName, null, null, true), StandardCharsets.UTF_8)); } protected final void assertFileNotInZooKeeper(SolrZkClient zkClient, String parent, String fileName) throws Exception { List kids = zkClient.getChildren(parent, null, true); for (String kid : kids) { if (kid.equalsIgnoreCase(fileName)) { - String rawContent = new String(zkClient.getData(fileName, null, null, true), "UTF-8"); + String rawContent = new String(zkClient.getData(fileName, null, null, true), StandardCharsets.UTF_8); fail("File '" + fileName + "' was unexpectedly found in ZooKeeper. Content starts with '" + rawContent.substring(0, 100) + " [...]'"); } diff --git a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java index 950d9366993..61799eb1522 100644 --- a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java +++ b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java @@ -17,10 +17,14 @@ package org.apache.solr.search; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.schema.DateField; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.request.SolrQueryRequest; @@ -28,11 +32,14 @@ import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.CursorPagingTest; import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START; +import java.io.IOException; import java.util.Arrays; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Collection; import java.util.Collections; +import java.util.UUID; import org.junit.BeforeClass; @@ -51,7 +58,7 @@ public class CursorMarkTest extends SolrTestCaseJ4 { initCore(CursorPagingTest.TEST_SOLRCONFIG_NAME, CursorPagingTest.TEST_SCHEMAXML_NAME); } - public void testNextCursorMark() { + public void testNextCursorMark() throws IOException { final Collection allFieldNames = getAllFieldNames(); final SolrQueryRequest req = req(); final IndexSchema schema = req.getSchema(); @@ -113,7 +120,7 @@ public class CursorMarkTest extends SolrTestCaseJ4 { } - public void testGarbageParsing() { + public void testGarbageParsing() throws IOException { final SolrQueryRequest req = req(); final IndexSchema schema = req.getSchema(); final SortSpec ss = QueryParsing.parseSortSpec("str asc, float desc, id asc", req); @@ -160,7 +167,7 @@ public class CursorMarkTest extends SolrTestCaseJ4 { } } - public void testRoundTripParsing() { + public void testRoundTripParsing() throws IOException { // for any valid SortSpec, and any legal values, we should be able to round // trip serialize the totem and get the same values back. @@ -196,7 +203,7 @@ public class CursorMarkTest extends SolrTestCaseJ4 { } } - private static Object[] buildRandomSortObjects(SortSpec ss) { + private static Object[] buildRandomSortObjects(SortSpec ss) throws IOException { List fields = ss.getSchemaFields(); assertNotNull(fields); Object[] results = new Object[fields.size()]; @@ -225,14 +232,64 @@ public class CursorMarkTest extends SolrTestCaseJ4 { byte[] randBytes = new byte[TestUtil.nextInt(random(), 1, 50)]; random().nextBytes(randBytes); val = new BytesRef(randBytes); - } else if (fieldName.startsWith("int")) { - val = (Integer) random().nextInt(); - } else if (fieldName.startsWith("long")) { - val = (Long) random().nextLong(); - } else if (fieldName.startsWith("float")) { - val = (Float) random().nextFloat() * random().nextInt(); break; - } else if (fieldName.startsWith("double")) { - val = (Double) random().nextDouble() * random().nextInt(); break; + } else if (fieldName.startsWith("bcd")) { + if (fieldName.startsWith("bcd_long")) { // BCDLongField + val = Long.toString(random().nextLong()); + val = sf.getType().toInternal((String)val); + val = sf.getType().unmarshalSortValue(val); + } else { // BCDIntField & BCDStrField + val = Integer.toString(random().nextInt()); + val = sf.getType().toInternal((String)val); + val = sf.getType().unmarshalSortValue(val); + } + } else if (fieldName.contains("int")) { + val = random().nextInt(); // TrieIntField + if (fieldName.startsWith("legacy")) { // IntField + val = Integer.toString((Integer)val); + if (fieldName.startsWith("legacy_sortable")) { // SortableIntField + val = sf.getType().unmarshalSortValue(val); + } + } + } else if (fieldName.contains("long")) { + val = random().nextLong(); // TrieLongField + if (fieldName.startsWith("legacy")) { // LongField + val = Long.toString((Long)val); + if (fieldName.startsWith("legacy_sortable")) { // SortableLongField + val = sf.getType().unmarshalSortValue(val); + } + } + } else if (fieldName.contains("float")) { + val = random().nextFloat() * random().nextInt(); // TrieFloatField + if (fieldName.startsWith("legacy")) { // FloatField + val = Float.toString((Float)val); + if (fieldName.startsWith("legacy_sortable")) { // SortableFloatField + val = sf.getType().unmarshalSortValue(val); + } + } + } else if (fieldName.contains("double")) { + val = random().nextDouble() * random().nextInt(); // TrieDoubleField + if (fieldName.startsWith("legacy")) { // DoubleField + val = Double.toString((Double)val); + if (fieldName.startsWith("legacy_sortable")) { // SortableDoubleField + val = sf.getType().unmarshalSortValue(val); + } + } + } else if (fieldName.contains("date")) { + val = random().nextLong(); // TrieDateField + if (fieldName.startsWith("legacy_date")) { // DateField + val = ((DateField)sf.getType()).toInternal(new Date((Long)val)); + val = sf.getType().unmarshalSortValue(val); + } + } else if (fieldName.startsWith("currency")) { + val = random().nextDouble(); + } else if (fieldName.startsWith("uuid")) { + val = sf.getType().unmarshalSortValue(UUID.randomUUID().toString()); + } else if (fieldName.startsWith("bool")) { + val = sf.getType().unmarshalSortValue(random().nextBoolean() ? "t" : "f"); + } else if (fieldName.startsWith("enum")) { + val = random().nextInt(CursorPagingTest.SEVERITY_ENUM_VALUES.length); + } else if (fieldName.contains("collation")) { + val = getRandomCollation(sf); } else { fail("fell through the rabbit hole, new field in schema? = " + fieldName); } @@ -243,6 +300,22 @@ public class CursorMarkTest extends SolrTestCaseJ4 { } return results; } + + private static Object getRandomCollation(SchemaField sf) throws IOException { + Object val; + Analyzer analyzer = sf.getType().getAnalyzer(); + String term = TestUtil.randomRealisticUnicodeString(random()); + try (TokenStream ts = analyzer.tokenStream("fake", term)) { + TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); + val = termAtt.getBytesRef(); + ts.reset(); + assertTrue(ts.incrementToken()); + termAtt.fillBytesRef(); + assertFalse(ts.incrementToken()); + ts.end(); + } + return val; + } /** * a list of the fields in the schema - excluding _version_ diff --git a/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java index cf34e74e833..195ff656ca3 100644 --- a/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestCollapseQParserPlugin.java @@ -23,9 +23,11 @@ import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import com.carrotsearch.hppc.IntOpenHashSet; import java.io.IOException; import java.util.*; +import java.util.Random; public class TestCollapseQParserPlugin extends SolrTestCaseJ4 { @@ -146,6 +148,51 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 { "//result/doc[3]/float[@name='id'][.='3.0']", "//result/doc[4]/float[@name='id'][.='6.0']"); + //Test SOLR-5773 with score collapse criteria + params = new ModifiableSolrParams(); + params.add("q", "YYYY"); + params.add("fq", "{!collapse field=group_s nullPolicy=collapse}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("qf", "term_s"); + params.add("qt", "/elevate"); + params.add("elevateIds", "1,5"); + assertQ(req(params), "*[count(//doc)=3]", + "//result/doc[1]/float[@name='id'][.='1.0']", + "//result/doc[2]/float[@name='id'][.='5.0']", + "//result/doc[3]/float[@name='id'][.='3.0']"); + + //Test SOLR-5773 with max field collapse criteria + params = new ModifiableSolrParams(); + params.add("q", "YYYY"); + params.add("fq", "{!collapse field=group_s min=test_ti nullPolicy=collapse}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("qf", "term_s"); + params.add("qt", "/elevate"); + params.add("elevateIds", "1,5"); + assertQ(req(params), "*[count(//doc)=3]", + "//result/doc[1]/float[@name='id'][.='1.0']", + "//result/doc[2]/float[@name='id'][.='5.0']", + "//result/doc[3]/float[@name='id'][.='4.0']"); + + + //Test SOLR-5773 elevating documents with null group + params = new ModifiableSolrParams(); + params.add("q", "YYYY"); + params.add("fq", "{!collapse field=group_s}"); + params.add("defType", "edismax"); + params.add("bf", "field(test_ti)"); + params.add("qf", "term_s"); + params.add("qt", "/elevate"); + params.add("elevateIds", "3,4"); + assertQ(req(params), "*[count(//doc)=4]", + "//result/doc[1]/float[@name='id'][.='3.0']", + "//result/doc[2]/float[@name='id'][.='4.0']", + "//result/doc[3]/float[@name='id'][.='2.0']", + "//result/doc[4]/float[@name='id'][.='6.0']"); + + //Test collapse by min int field and sort params = new ModifiableSolrParams(); diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java index 2bf35dca533..af588e11c33 100644 --- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java +++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java @@ -418,6 +418,10 @@ public class TestDocSet extends LuceneTestCase { @Override public void document(int doc, StoredFieldVisitor visitor) { } + + @Override + public void checkIntegrity() throws IOException { + } }; } diff --git a/solr/core/src/test/org/apache/solr/search/TestRecovery.java b/solr/core/src/test/org/apache/solr/search/TestRecovery.java index 1b8bc633cd9..95f20171ab5 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRecovery.java +++ b/solr/core/src/test/org/apache/solr/search/TestRecovery.java @@ -33,6 +33,7 @@ import org.junit.Test; import java.io.File; import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.Arrays; import java.util.Deque; @@ -1030,9 +1031,9 @@ public class TestRecovery extends SolrTestCaseJ4 { raf.close(); // Now make a newer log file with just the IDs changed. NOTE: this may not work if log format changes too much! - findReplace("AAAAAA".getBytes("UTF-8"), "aaaaaa".getBytes("UTF-8"), content); - findReplace("BBBBBB".getBytes("UTF-8"), "bbbbbb".getBytes("UTF-8"), content); - findReplace("CCCCCC".getBytes("UTF-8"), "cccccc".getBytes("UTF-8"), content); + findReplace("AAAAAA".getBytes(StandardCharsets.UTF_8), "aaaaaa".getBytes(StandardCharsets.UTF_8), content); + findReplace("BBBBBB".getBytes(StandardCharsets.UTF_8), "bbbbbb".getBytes(StandardCharsets.UTF_8), content); + findReplace("CCCCCC".getBytes(StandardCharsets.UTF_8), "cccccc".getBytes(StandardCharsets.UTF_8), content); // WARNING... assumes format of .00000n where n is less than 9 long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1)); diff --git a/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java b/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java index 8386ab1ff86..dfe64bc0299 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java +++ b/solr/core/src/test/org/apache/solr/search/TestRecoveryHdfs.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.Arrays; import java.util.Deque; @@ -1028,9 +1029,9 @@ public class TestRecoveryHdfs extends SolrTestCaseJ4 { dis.close(); // Now make a newer log file with just the IDs changed. NOTE: this may not work if log format changes too much! - findReplace("AAAAAA".getBytes("UTF-8"), "aaaaaa".getBytes("UTF-8"), content); - findReplace("BBBBBB".getBytes("UTF-8"), "bbbbbb".getBytes("UTF-8"), content); - findReplace("CCCCCC".getBytes("UTF-8"), "cccccc".getBytes("UTF-8"), content); + findReplace("AAAAAA".getBytes(StandardCharsets.UTF_8), "aaaaaa".getBytes(StandardCharsets.UTF_8), content); + findReplace("BBBBBB".getBytes(StandardCharsets.UTF_8), "bbbbbb".getBytes(StandardCharsets.UTF_8), content); + findReplace("CCCCCC".getBytes(StandardCharsets.UTF_8), "cccccc".getBytes(StandardCharsets.UTF_8), content); // WARNING... assumes format of .00000n where n is less than 9 long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1)); diff --git a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java index 7f1bba79bbc..3952fb09703 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -28,9 +28,11 @@ import org.apache.solr.common.util.NamedList; import org.junit.BeforeClass; import org.junit.Test; import org.junit.Ignore; + import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -49,11 +51,12 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { String base = "external_foo_extf"; static long start = System.currentTimeMillis(); - void makeExternalFile(String field, String contents, String charset) { + + void makeExternalFile(String field, String contents) { String dir = h.getCore().getDataDir(); String filename = dir + "/external_" + field + "." + (start++); try { - Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset); + Writer out = new OutputStreamWriter(new FileOutputStream(filename), StandardCharsets.UTF_8); out.write(contents); out.close(); } catch (Exception e) { @@ -219,7 +222,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { createIndex(null,ids); // Unsorted field, largest first - makeExternalFile(field, "54321=543210\n0=-999\n25=250","UTF-8"); + makeExternalFile(field, "54321=543210\n0=-999\n25=250"); // test identity (straight field value) singleTest(field, "\0", 54321, 543210, 0,-999, 25,250, 100, 1); Object orig = FileFloatSource.onlyForTesting; @@ -229,7 +232,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { singleTest(field, "sqrt(\0)"); assertTrue(orig == FileFloatSource.onlyForTesting); - makeExternalFile(field, "0=1","UTF-8"); + makeExternalFile(field, "0=1"); assertU(h.query("/reloadCache",lrf.makeRequest("",""))); singleTest(field, "sqrt(\0)"); assertTrue(orig != FileFloatSource.onlyForTesting); @@ -263,7 +266,7 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { for (int j=0; j getRanges(String id1, String id2) throws UnsupportedEncodingException { // find minHash/maxHash hash ranges - byte[] bytes = id1.getBytes("UTF-8"); + byte[] bytes = id1.getBytes(StandardCharsets.UTF_8); int minHash = Hash.murmurhash3_x86_32(bytes, 0, bytes.length, 0); - bytes = id2.getBytes("UTF-8"); + bytes = id2.getBytes(StandardCharsets.UTF_8); int maxHash = Hash.murmurhash3_x86_32(bytes, 0, bytes.length, 0); if (minHash > maxHash) { diff --git a/solr/core/src/test/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactoryTest.java new file mode 100644 index 00000000000..b4e8ca0bb79 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactoryTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrInputDocument; + +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorChain; +import org.apache.solr.update.processor.UpdateRequestProcessorFactory; +import org.apache.solr.update.UpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.update.DeleteUpdateCommand; + +import org.junit.BeforeClass; + +import java.util.Date; +import java.util.concurrent.TimeUnit; + +/** + * Tests various configurations of DocExpirationUpdateProcessorFactory + */ +public class DocExpirationUpdateProcessorFactoryTest extends UpdateProcessorTestBase { + + public static final String CONFIG_XML = "solrconfig-doc-expire-update-processor.xml"; + public static final String SCHEMA_XML = "schema15.xml"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore(CONFIG_XML, SCHEMA_XML); + } + + public void testTTLDefaultsConversion() throws Exception { + SolrInputDocument d = null; + + d = processAdd("convert-ttl-defaults", + params("NOW","1394059630042"), + doc(f("id", "1111"), + f("_ttl_","+5MINUTES"))); + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd("convert-ttl-defaults", + params("NOW","1394059630042", + "_ttl_","+5MINUTES"), + doc(f("id", "1111"))); + + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + } + + public void testTTLFieldConversion() throws Exception { + final String chain = "convert-ttl-field"; + SolrInputDocument d = null; + d = processAdd(chain, + params("NOW","1394059630042"), + doc(f("id", "1111"), + f("_ttl_field_","+5MINUTES"))); + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd(chain, + params("NOW","1394059630042"), + doc(f("id", "2222"), + f("_ttl_field_","+27MINUTES"))); + assertNotNull(d); + assertEquals(new Date(1394061250042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd(chain, + params("NOW","1394059630042"), + doc(f("id", "3333"), + f("_ttl_field_","+1YEAR"))); + assertNotNull(d); + assertEquals(new Date(1425595630042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd(chain, + params("NOW","1394059630042"), + doc(f("id", "1111"), + f("_ttl_field_","/DAY+1YEAR"))); + assertNotNull(d); + assertEquals(new Date(1425513600000L), d.getFieldValue("_expire_at_tdt")); + + // default ttlParamName is disabled, this should not convert... + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_","+5MINUTES"), + doc(f("id", "1111"))); + assertNotNull(d); + assertNull(d.getFieldValue("_expire_at_tdt")); + } + + public void testTTLParamConversion() throws Exception { + final String chain = "convert-ttl-param"; + SolrInputDocument d = null; + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_param_","+5MINUTES"), + doc(f("id", "1111"))); + + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_param_","+27MINUTES"), + doc(f("id", "2222"))); + assertNotNull(d); + assertEquals(new Date(1394061250042L), d.getFieldValue("_expire_at_tdt")); + + // default ttlFieldName is disabled, param should be used... + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_param_","+5MINUTES"), + doc(f("id", "1111"), + f("_ttl_field_","+999MINUTES"))); + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + + // default ttlFieldName is disabled, this should not convert... + d = processAdd(chain, + params("NOW","1394059630042"), + doc(f("id", "1111"), + f("_ttl_","/DAY+1YEAR"))); + assertNotNull(d); + assertNull(d.getFieldValue("_expire_at_tdt")); + } + + public void testTTLFieldConversionWithDefaultParam() throws Exception { + final String chain = "convert-ttl-field-with-param-default"; + SolrInputDocument d = null; + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_param_","+999MINUTES"), + doc(f("id", "1111"), + f("_ttl_field_","+5MINUTES"))); + assertNotNull(d); + assertEquals(new Date(1394059930042L), d.getFieldValue("_expire_at_tdt")); + + d = processAdd(chain, + params("NOW","1394059630042", + "_ttl_param_","+27MINUTES"), + doc(f("id", "2222"))); + assertNotNull(d); + assertEquals(new Date(1394061250042L), d.getFieldValue("_expire_at_tdt")); + + } + + public void testAutomaticDeletes() throws Exception { + + // get a handle on our recorder + + UpdateRequestProcessorChain chain = + h.getCore().getUpdateProcessingChain("scheduled-delete"); + + assertNotNull(chain); + + UpdateRequestProcessorFactory[] factories = chain.getFactories(); + assertEquals("did number of processors configured in chain get changed?", + 5, factories.length); + assertTrue("Expected [1] RecordingUpdateProcessorFactory: " + factories[1].getClass(), + factories[1] instanceof RecordingUpdateProcessorFactory); + RecordingUpdateProcessorFactory recorder = + (RecordingUpdateProcessorFactory) factories[1]; + + // now start recording, and monitor for the expected commands + + try { + recorder.startRecording(); + + // more then one iter to verify it's recurring + final int numItersToCheck = 1 + RANDOM_MULTIPLIER; + + for (int i = 0; i < numItersToCheck; i++) { + UpdateCommand tmp; + + // be generous in how long we wait, some jenkins machines are slooooow + tmp = recorder.commandQueue.poll(30, TimeUnit.SECONDS); + + // we can be confident in the order because DocExpirationUpdateProcessorFactory + // uses the same request for both the delete & the commit -- and both + // RecordingUpdateProcessorFactory's getInstance & startRecording methods are + // synchronized. So it should not be possible to start recording in the + // middle of the two commands + assertTrue("expected DeleteUpdateCommand: " + tmp.getClass(), + tmp instanceof DeleteUpdateCommand); + + DeleteUpdateCommand delete = (DeleteUpdateCommand) tmp; + assertFalse(delete.isDeleteById()); + assertNotNull(delete.getQuery()); + assertTrue(delete.getQuery(), + delete.getQuery().startsWith("{!cache=false}eXpField_tdt:[* TO ")); + + // commit should be immediately after the delete + tmp = recorder.commandQueue.poll(5, TimeUnit.SECONDS); + assertTrue("expected CommitUpdateCommand: " + tmp.getClass(), + tmp instanceof CommitUpdateCommand); + + CommitUpdateCommand commit = (CommitUpdateCommand) tmp; + assertTrue(commit.softCommit); + assertTrue(commit.openSearcher); + } + } finally { + recorder.stopRecording(); + } + } + + +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/RecordingUpdateProcessorFactory.java b/solr/core/src/test/org/apache/solr/update/processor/RecordingUpdateProcessorFactory.java new file mode 100644 index 00000000000..add57ef3257 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/RecordingUpdateProcessorFactory.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; + +import org.apache.solr.common.SolrException; +import static org.apache.solr.common.SolrException.ErrorCode.*; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.UpdateCommand; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.apache.solr.update.DeleteUpdateCommand; +import org.apache.solr.update.MergeIndexesCommand; +import org.apache.solr.update.RollbackUpdateCommand; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * This Factory can optionally save refrences to the commands it receives in + * BlockingQueues that tests can poll from to observe that the exepected commands + * are executed. By default, this factory does nothing except return the "next" + * processor from the chain unless it's told to {@link #startRecording()} + */ +public final class RecordingUpdateProcessorFactory + extends UpdateRequestProcessorFactory { + + private boolean recording = false; + + /** The queue containing commands that were recorded + * @see #startRecording + */ + public final BlockingQueue commandQueue + = new LinkedBlockingQueue(); + + /** + * @see #stopRecording + * @see #commandQueue + */ + public synchronized void startRecording() { + recording = true; + } + + /** @see #startRecording */ + public synchronized void stopRecording() { + recording = false; + } + + @Override + public synchronized UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next ) { + return recording ? new RecordingUpdateRequestProcessor(commandQueue, next) : next; + } + + private static final class RecordingUpdateRequestProcessor + extends UpdateRequestProcessor { + + private final BlockingQueue commandQueue; + + public RecordingUpdateRequestProcessor(BlockingQueue commandQueue, + UpdateRequestProcessor next) { + super(next); + this.commandQueue = commandQueue; + } + + private void record(UpdateCommand cmd) { + if (! commandQueue.offer(cmd) ) { + throw new RuntimeException + ("WTF: commandQueue should be unbounded but offer failed: " + cmd.toString()); + } + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + record(cmd); + super.processAdd(cmd); + } + @Override + public void processDelete(DeleteUpdateCommand cmd) throws IOException { + record(cmd); + super.processDelete(cmd); + } + @Override + public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException { + record(cmd); + super.processMergeIndexes(cmd); + } + @Override + public void processCommit(CommitUpdateCommand cmd) throws IOException { + record(cmd); + super.processCommit(cmd); + } + @Override + public void processRollback(RollbackUpdateCommand cmd) throws IOException { + record(cmd); + super.processRollback(cmd); + } + } +} + + + diff --git a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java b/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java index 5ddff4555d0..8d849d7be9c 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java +++ b/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java @@ -24,6 +24,7 @@ import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; @@ -65,14 +66,19 @@ public class UpdateProcessorTestBase extends SolrTestCaseJ4 { SolrQueryRequest req = new LocalSolrQueryRequest(core, requestParams); try { + SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); AddUpdateCommand cmd = new AddUpdateCommand(req); cmd.solrDoc = docIn; UpdateRequestProcessor processor = pc.createProcessor(req, rsp); - processor.processAdd(cmd); + if (null != processor) { + // test chain might be empty or short circuted. + processor.processAdd(cmd); + } return cmd.solrDoc; } finally { + SolrRequestInfo.clearRequestInfo(); req.close(); } } diff --git a/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java index 8655bd95c02..2b93cebd107 100644 --- a/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java +++ b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; import java.util.Set; @@ -213,7 +214,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 { sb.append("Disallow: /disallow # Disallow this path\n"); sb.append("Disallow: /nonexistingpath # Disallow this path\n"); this.robotsCache.put("[ff01::114]", SimplePostTool.pageFetcher. - parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8")))); + parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8)))); } @Override @@ -225,11 +226,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 { } res.httpStatus = 200; res.contentType = "text/html"; - try { - res.content = htmlMap.get(u.toString()).getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(); - } + res.content = htmlMap.get(u.toString()).getBytes(StandardCharsets.UTF_8); return res; } diff --git a/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json b/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json new file mode 100644 index 00000000000..869bdce0514 --- /dev/null +++ b/solr/example/solr/collection1/conf/_schema_analysis_synonyms_english.json @@ -0,0 +1,11 @@ +{ + "initArgs":{ + "ignoreCase":true, + "format":"solr" + }, + "managedMap":{ + "GB":["GiB","Gigabyte"], + "happy":["glad","joyful"], + "TV":["Television"] + } +} diff --git a/solr/example/solr/collection1/conf/schema.xml b/solr/example/solr/collection1/conf/schema.xml index adaedfdbf8a..5504a0f00f1 100755 --- a/solr/example/solr/collection1/conf/schema.xml +++ b/solr/example/solr/collection1/conf/schema.xml @@ -453,6 +453,7 @@ + diff --git a/solr/licenses/junit4-ant-2.1.1.jar.sha1 b/solr/licenses/junit4-ant-2.1.1.jar.sha1 deleted file mode 100644 index 4340e4c8609..00000000000 --- a/solr/licenses/junit4-ant-2.1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a8a7371e11a8b3a4a3eeea81ad3cedafe3e3550e diff --git a/solr/licenses/junit4-ant-2.1.3.jar.sha1 b/solr/licenses/junit4-ant-2.1.3.jar.sha1 new file mode 100644 index 00000000000..c2d6fa49fa8 --- /dev/null +++ b/solr/licenses/junit4-ant-2.1.3.jar.sha1 @@ -0,0 +1 @@ +8636804644d4ae3874f0efaa98978887e171cd55 diff --git a/solr/licenses/randomizedtesting-runner-2.1.1.jar.sha1 b/solr/licenses/randomizedtesting-runner-2.1.1.jar.sha1 deleted file mode 100644 index 2923eedf9fe..00000000000 --- a/solr/licenses/randomizedtesting-runner-2.1.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5908c4e714dab40ccc892993a21537c7c0d6210c diff --git a/solr/licenses/randomizedtesting-runner-2.1.3.jar.sha1 b/solr/licenses/randomizedtesting-runner-2.1.3.jar.sha1 new file mode 100644 index 00000000000..5da2ec2946a --- /dev/null +++ b/solr/licenses/randomizedtesting-runner-2.1.3.jar.sha1 @@ -0,0 +1 @@ +d340caee99857ed0384681eea6219a4d937e7ee4 diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java index dadc235773e..5a9af1e89d8 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java @@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.impl; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.Locale; import java.util.Queue; @@ -172,7 +173,7 @@ public class ConcurrentUpdateSolrServer extends SolrServer { public void writeTo(OutputStream out) throws IOException { try { if (isXml) { - out.write("".getBytes("UTF-8")); // can be anything + out.write("".getBytes(StandardCharsets.UTF_8)); // can be anything } UpdateRequest req = updateRequest; while (req != null) { @@ -197,7 +198,7 @@ public class ConcurrentUpdateSolrServer extends SolrServer { byte[] content = String.format(Locale.ROOT, fmt, params.getBool(UpdateParams.WAIT_SEARCHER, false) - + "").getBytes("UTF-8"); + + "").getBytes(StandardCharsets.UTF_8); out.write(content); } } @@ -206,7 +207,7 @@ public class ConcurrentUpdateSolrServer extends SolrServer { req = queue.poll(pollQueueTime, TimeUnit.MILLISECONDS); } if (isXml) { - out.write("".getBytes("UTF-8")); + out.write("".getBytes(StandardCharsets.UTF_8)); } } catch (InterruptedException e) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java index 040b20c829a..92b5ac22f50 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java @@ -21,6 +21,7 @@ import java.io.InputStream; import java.net.ConnectException; import java.net.SocketTimeoutException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; import java.util.Iterator; @@ -78,7 +79,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HttpSolrServer extends SolrServer { - private static final String UTF_8 = "UTF-8"; + private static final String UTF_8 = StandardCharsets.UTF_8.name(); private static final String DEFAULT_PATH = "/select"; private static final long serialVersionUID = -946812319974801896L; @@ -335,7 +336,7 @@ public class HttpSolrServer extends SolrServer { if (vals != null) { for (String v : vals) { if (isMultipart) { - parts.add(new FormBodyPart(p, new StringBody(v, Charset.forName("UTF-8")))); + parts.add(new FormBodyPart(p, new StringBody(v, StandardCharsets.UTF_8))); } else { postParams.add(new BasicNameValuePair(p, v)); } @@ -369,7 +370,7 @@ public class HttpSolrServer extends SolrServer { post.setEntity(entity); } else { //not using multipart - post.setEntity(new UrlEncodedFormEntity(postParams, "UTF-8")); + post.setEntity(new UrlEncodedFormEntity(postParams, StandardCharsets.UTF_8)); } method = post; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java index a1ebefe9ddd..858795719bf 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java @@ -298,85 +298,17 @@ public class LBHttpSolrServer extends SolrServer { rsp.server = serverStr; HttpSolrServer server = makeServer(serverStr); - try { - rsp.rsp = server.request(req.getRequest()); + ex = doRequest(server, req, rsp, isUpdate, false, null); + if (ex == null) { return rsp; // SUCCESS - } catch (SolrException e) { - // we retry on 404 or 403 or 503 or 500 - // unless it's an update - then we only retry on connect exceptions - if (!isUpdate && RETRY_CODES.contains(e.code())) { - ex = addZombie(server, e); - } else { - // Server is alive but the request was likely malformed or invalid - throw e; - } - } catch (SocketException e) { - if (!isUpdate || e instanceof ConnectException) { - ex = addZombie(server, e); - } else { - throw e; - } - } catch (SocketTimeoutException e) { - if (!isUpdate) { - ex = addZombie(server, e); - } else { - throw e; - } - } catch (SolrServerException e) { - Throwable rootCause = e.getRootCause(); - if (!isUpdate && rootCause instanceof IOException) { - ex = addZombie(server, e); - } else if (isUpdate && rootCause instanceof ConnectException) { - ex = addZombie(server, e); - } else { - throw e; - } - } catch (Exception e) { - throw new SolrServerException(e); } } // try the servers we previously skipped for (ServerWrapper wrapper : skipped) { - try { - rsp.rsp = wrapper.solrServer.request(req.getRequest()); - zombieServers.remove(wrapper.getKey()); - return rsp; // SUCCESS - } catch (SolrException e) { - // we retry on 404 or 403 or 503 or 500 - // unless it's an update - then we only retry on connect exceptions - if (!isUpdate && RETRY_CODES.contains(e.code())) { - ex = e; - // already a zombie, no need to re-add - } else { - // Server is alive but the request was malformed or invalid - zombieServers.remove(wrapper.getKey()); - throw e; - } - - } catch (SocketException e) { - if (!isUpdate || e instanceof ConnectException) { - ex = e; - } else { - throw e; - } - } catch (SocketTimeoutException e) { - if (!isUpdate) { - ex = e; - } else { - throw e; - } - } catch (SolrServerException e) { - Throwable rootCause = e.getRootCause(); - if (!isUpdate && rootCause instanceof IOException) { - ex = e; - } else if (isUpdate && rootCause instanceof ConnectException) { - ex = e; - } else { - throw e; - } - } catch (Exception e) { - throw new SolrServerException(e); + ex = doRequest(wrapper.solrServer, req, rsp, isUpdate, true, wrapper.getKey()); + if (ex == null) { + return rsp; // SUCCESS } } @@ -401,7 +333,53 @@ public class LBHttpSolrServer extends SolrServer { return e; } + protected Exception doRequest(HttpSolrServer server, Req req, Rsp rsp, boolean isUpdate, + boolean isZombie, String zombieKey) throws SolrServerException, IOException { + Exception ex = null; + try { + rsp.rsp = server.request(req.getRequest()); + if (isZombie) { + zombieServers.remove(zombieKey); + } + } catch (SolrException e) { + // we retry on 404 or 403 or 503 or 500 + // unless it's an update - then we only retry on connect exception + if (!isUpdate && RETRY_CODES.contains(e.code())) { + ex = (!isZombie) ? addZombie(server, e) : e; + } else { + // Server is alive but the request was likely malformed or invalid + if (isZombie) { + zombieServers.remove(zombieKey); + } + throw e; + } + } catch (SocketException e) { + if (!isUpdate || e instanceof ConnectException) { + ex = (!isZombie) ? addZombie(server, e) : e; + } else { + throw e; + } + } catch (SocketTimeoutException e) { + if (!isUpdate) { + ex = (!isZombie) ? addZombie(server, e) : e; + } else { + throw e; + } + } catch (SolrServerException e) { + Throwable rootCause = e.getRootCause(); + if (!isUpdate && rootCause instanceof IOException) { + ex = (!isZombie) ? addZombie(server, e) : e; + } else if (isUpdate && rootCause instanceof ConnectException) { + ex = (!isZombie) ? addZombie(server, e) : e; + } else { + throw e; + } + } catch (Exception e) { + throw new SolrServerException(e); + } + return ex; + } private void updateAliveList() { synchronized (aliveServers) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/AbstractUpdateRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/AbstractUpdateRequest.java index dce10455290..acfc525b33e 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/AbstractUpdateRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/AbstractUpdateRequest.java @@ -83,6 +83,12 @@ public abstract class AbstractUpdateRequest extends SolrRequest implements IsUpd return setAction(action, waitFlush, waitSearcher,maxSegments,false,expungeDeletes); } + public AbstractUpdateRequest setAction(ACTION action, boolean waitFlush, boolean waitSearcher, int maxSegments, boolean softCommit, boolean expungeDeletes, boolean openSearcher) { + setAction(action, waitFlush, waitSearcher, maxSegments, softCommit, expungeDeletes); + params.set(UpdateParams.OPEN_SEARCHER, String.valueOf(openSearcher)); + return this; + } + /** * @since Solr 1.4 */ diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java index 2990653c6ea..a33fe7e377a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java @@ -28,6 +28,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; /** * A RequestWriter is used to write requests to Solr. @@ -38,7 +39,7 @@ import java.nio.charset.Charset; * @since solr 1.4 */ public class RequestWriter { - public static final Charset UTF_8 = Charset.forName("UTF-8"); + public static final Charset UTF_8 = StandardCharsets.UTF_8; public Collection getContentStreams(SolrRequest req) throws IOException { if (req instanceof UpdateRequest) { diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java index a2873e0c9c4..d8ea26859ed 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.concurrent.atomic.AtomicLong; @@ -505,22 +506,17 @@ public class SolrZkClient { } string.append(dent + path + " (" + children.size() + ")" + NEWL); if (data != null) { - try { - String dataString = new String(data, "UTF-8"); - if ((!path.endsWith(".txt") && !path.endsWith(".xml")) || path.endsWith(ZkStateReader.CLUSTER_STATE)) { - if (path.endsWith(".xml")) { - // this is the cluster state in xml format - lets pretty print - dataString = prettyPrint(dataString); - } - - string.append(dent + "DATA:\n" + dent + " " - + dataString.replaceAll("\n", "\n" + dent + " ") + NEWL); - } else { - string.append(dent + "DATA: ...supressed..." + NEWL); + String dataString = new String(data, StandardCharsets.UTF_8); + if ((!path.endsWith(".txt") && !path.endsWith(".xml")) || path.endsWith(ZkStateReader.CLUSTER_STATE)) { + if (path.endsWith(".xml")) { + // this is the cluster state in xml format - lets pretty print + dataString = prettyPrint(dataString); } - } catch (UnsupportedEncodingException e) { - // can't happen - UTF-8 - throw new RuntimeException(e); + + string.append(dent + "DATA:\n" + dent + " " + + dataString.replaceAll("\n", "\n" + dent + " ") + NEWL); + } else { + string.append(dent + "DATA: ...supressed..." + NEWL); } } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java index 71fb24ceda9..d8a50b35576 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java @@ -44,7 +44,9 @@ public interface CollectionParams CLUSTERPROP, REQUESTSTATUS, ADDREPLICA, - OVERSEERSTATUS; + OVERSEERSTATUS, + LIST, + CLUSTERSTATUS; public static CollectionAction get( String p ) { diff --git a/solr/solrj/src/java/org/apache/solr/common/params/ExpandParams.java b/solr/solrj/src/java/org/apache/solr/common/params/ExpandParams.java index 55f37335c15..a8f0cf7dc03 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/ExpandParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/ExpandParams.java @@ -25,7 +25,8 @@ public interface ExpandParams { public static final String EXPAND = "expand"; public static final String EXPAND_SORT = EXPAND + ".sort"; public static final String EXPAND_ROWS = EXPAND + ".rows"; - - + public static final String EXPAND_FIELD = EXPAND + ".field"; + public static final String EXPAND_Q = EXPAND + ".q"; + public static final String EXPAND_FQ = EXPAND + ".fq"; } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java b/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java index d531b0faf33..34238b5f928 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java @@ -27,6 +27,7 @@ import java.io.Reader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; +import java.nio.charset.StandardCharsets; import java.util.Locale; /** @@ -37,7 +38,7 @@ import java.util.Locale; */ public abstract class ContentStreamBase implements ContentStream { - public static final String DEFAULT_CHARSET = "utf-8"; + public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name(); protected String name; protected String sourceInfo; diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrServerTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrServerTest.java index 3d469b0d672..3e45fa06ace 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrServerTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrServerTest.java @@ -238,9 +238,9 @@ public class CloudSolrServerTest extends AbstractFullDistribZkTestBase { // Calculate a number of shard keys that route to the same shard. int n; if (TEST_NIGHTLY) { - n = random().nextInt(999) + 1; + n = random().nextInt(999) + 2; } else { - n = random().nextInt(9) + 1; + n = random().nextInt(9) + 2; } List sameShardRoutes = Lists.newArrayList(); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java index 490d2f0870f..448d295358a 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java @@ -41,6 +41,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.List; /** @@ -102,7 +103,7 @@ public class NoOpResponseParserTest extends SolrJettyTestBase { NoOpResponseParser parser = new NoOpResponseParser(); try (final InputStream is = getResponse()) { assertNotNull(is); - Reader in = new InputStreamReader(is, "UTF-8"); + Reader in = new InputStreamReader(is, StandardCharsets.UTF_8); NamedList response = parser.processResponse(in); assertNotNull(response.get("response")); String expectedResponse = IOUtils.toString(getResponse(), "UTF-8"); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/QueryResponseTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/QueryResponseTest.java index 1c1b70a9e90..c5692fe1007 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/QueryResponseTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/QueryResponseTest.java @@ -18,6 +18,7 @@ package org.apache.solr.client.solrj.response; import junit.framework.Assert; + import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.common.SolrDocumentList; @@ -29,6 +30,7 @@ import org.junit.Test; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.StandardCharsets; import java.util.List; /** @@ -42,7 +44,7 @@ public class QueryResponseTest extends LuceneTestCase { XMLResponseParser parser = new XMLResponseParser(); InputStream is = new SolrResourceLoader(null, null).openResource("solrj/sampleDateFacetResponse.xml"); assertNotNull(is); - Reader in = new InputStreamReader(is, "UTF-8"); + Reader in = new InputStreamReader(is, StandardCharsets.UTF_8); NamedList response = parser.processResponse(in); in.close(); @@ -66,7 +68,7 @@ public class QueryResponseTest extends LuceneTestCase { XMLResponseParser parser = new XMLResponseParser(); InputStream is = new SolrResourceLoader(null, null).openResource("solrj/sampleDateFacetResponse.xml"); assertNotNull(is); - Reader in = new InputStreamReader(is, "UTF-8"); + Reader in = new InputStreamReader(is, StandardCharsets.UTF_8); NamedList response = parser.processResponse(in); in.close(); @@ -123,7 +125,7 @@ public class QueryResponseTest extends LuceneTestCase { XMLResponseParser parser = new XMLResponseParser(); InputStream is = new SolrResourceLoader(null, null).openResource("solrj/sampleGroupResponse.xml"); assertNotNull(is); - Reader in = new InputStreamReader(is, "UTF-8"); + Reader in = new InputStreamReader(is, StandardCharsets.UTF_8); NamedList response = parser.processResponse(in); in.close(); @@ -225,7 +227,7 @@ public class QueryResponseTest extends LuceneTestCase { XMLResponseParser parser = new XMLResponseParser(); InputStream is = new SolrResourceLoader(null, null).openResource("solrj/sampleSimpleGroupResponse.xml"); assertNotNull(is); - Reader in = new InputStreamReader(is, "UTF-8"); + Reader in = new InputStreamReader(is, StandardCharsets.UTF_8); NamedList response = parser.processResponse(in); in.close(); diff --git a/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java b/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java index 22c3d85fdb0..2945a189cb6 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URL; +import java.nio.charset.StandardCharsets; import org.apache.commons.io.IOUtils; import org.apache.solr.SolrTestCaseJ4; @@ -58,13 +59,15 @@ public class ContentStreamTest extends SolrTestCaseJ4 InputStream s = stream.getStream(); FileInputStream fis = new FileInputStream(file); InputStreamReader isr = new InputStreamReader( - new FileInputStream(file), "UTF-8"); + new FileInputStream(file), StandardCharsets.UTF_8); + Reader r = stream.getReader(); try { assertEquals(file.length(), stream.getSize().intValue()); assertTrue(IOUtils.contentEquals(fis, s)); - assertTrue(IOUtils.contentEquals(isr, stream.getReader())); + assertTrue(IOUtils.contentEquals(isr, r)); } finally { s.close(); + r.close(); isr.close(); fis.close(); } @@ -86,7 +89,7 @@ public class ContentStreamTest extends SolrTestCaseJ4 InputStream s = stream.getStream(); FileInputStream fis = new FileInputStream(file); FileInputStream fis2 = new FileInputStream(file); - InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + InputStreamReader isr = new InputStreamReader(fis, StandardCharsets.UTF_8); Reader r = stream.getReader(); try { assertTrue(IOUtils.contentEquals(fis2, s)); diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index 1aeb7c572b1..fe13e345703 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -325,7 +325,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { if (xmlStr == null) { xmlStr = ""; } - FileUtils.write(tmpFile, xmlStr, IOUtils.CHARSET_UTF_8.toString()); + FileUtils.write(tmpFile, xmlStr, IOUtils.UTF_8); SolrResourceLoader loader = new SolrResourceLoader(solrHome.getAbsolutePath()); h = new TestHarness(loader, ConfigSolr.fromFile(loader, new File(solrHome, "solr.xml"))); diff --git a/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java b/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java index 1df75eaacbc..bd2cec16d66 100644 --- a/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java +++ b/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java @@ -20,6 +20,7 @@ package org.apache.solr.analysis; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import org.apache.lucene.analysis.util.ResourceLoader; @@ -51,6 +52,6 @@ class StringMockSolrResourceLoader implements ResourceLoader { @Override public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); + return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); } } diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java index c1595f58c2c..aa7e8d16b88 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java @@ -1136,27 +1136,23 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes Set onlyInB = new HashSet<>(setB); onlyInB.removeAll(setA); - if (onlyInA.size() > 0) { - for (SolrDocument doc : onlyInA) { - if (!addFails.contains(doc.getFirstValue("id"))) { - legal = false; - } else { - System.err.println("###### Only in " + aName + ": " + onlyInA - + ", but this is expected because we found an add fail for " - + doc.getFirstValue("id")); - } + for (SolrDocument doc : onlyInA) { + if (!addFails.contains(doc.getFirstValue("id"))) { + legal = false; + } else { + System.err.println("###### Only in " + aName + ": " + onlyInA + + ", but this is expected because we found an add fail for " + + doc.getFirstValue("id")); } - } - if (onlyInB.size() > 0) { - for (SolrDocument doc : onlyInB) { - if (!deleteFails.contains(doc.getFirstValue("id"))) { - legal = false; - } else { - System.err.println("###### Only in " + bName + ": " + onlyInB - + ", but this is expected because we found a delete fail for " - + doc.getFirstValue("id")); - } + + for (SolrDocument doc : onlyInB) { + if (!deleteFails.contains(doc.getFirstValue("id"))) { + legal = false; + } else { + System.err.println("###### Only in " + bName + ": " + onlyInB + + ", but this is expected because we found a delete fail for " + + doc.getFirstValue("id")); } } @@ -1654,8 +1650,12 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes if (client == null) { final String baseUrl = getBaseUrl((HttpSolrServer) clients.get(clientIndex)); SolrServer server = createNewSolrServer("", baseUrl); - res.setResponse(server.request(request)); - server.shutdown(); + try { + res.setResponse(server.request(request)); + server.shutdown(); + } finally { + if (server != null) server.shutdown(); + } } else { res.setResponse(client.request(request)); } diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java b/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java index c3cc399dd87..9a044a24477 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/ZkTestServer.java @@ -26,6 +26,7 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.Socket; import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -314,7 +315,7 @@ public class ZkTestServer { BufferedReader reader = null; try { OutputStream outstream = sock.getOutputStream(); - outstream.write(cmd.getBytes("US-ASCII")); + outstream.write(cmd.getBytes(StandardCharsets.US_ASCII)); outstream.flush(); // this replicates NC - close the output stream before reading sock.shutdownOutput(); diff --git a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java index 94dba92736d..2b15abb4bb9 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java +++ b/solr/test-framework/src/java/org/apache/solr/util/BaseTestHarness.java @@ -28,10 +28,12 @@ import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringWriter; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; abstract public class BaseTestHarness { private static final ThreadLocal builderTL = new ThreadLocal<>(); @@ -80,7 +82,7 @@ abstract public class BaseTestHarness { Document document = null; try { document = getXmlDocumentBuilder().parse(new ByteArrayInputStream - (xml.getBytes("UTF-8"))); + (xml.getBytes(StandardCharsets.UTF_8))); } catch (UnsupportedEncodingException e1) { throw new RuntimeException("Totally weird UTF-8 exception", e1); } catch (IOException e2) { @@ -105,7 +107,7 @@ abstract public class BaseTestHarness { Document document = null; try { document = getXmlDocumentBuilder().parse(new ByteArrayInputStream - (xml.getBytes("UTF-8"))); + (xml.getBytes(StandardCharsets.UTF_8))); } catch (UnsupportedEncodingException e1) { throw new RuntimeException("Totally weird UTF-8 exception", e1); } catch (IOException e2) { diff --git a/solr/test-framework/src/java/org/apache/solr/util/RestTestHarness.java b/solr/test-framework/src/java/org/apache/solr/util/RestTestHarness.java index e298947fd24..0935d6ed5ec 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/RestTestHarness.java +++ b/solr/test-framework/src/java/org/apache/solr/util/RestTestHarness.java @@ -18,6 +18,7 @@ package org.apache.solr.util; import java.io.IOException; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; @@ -106,7 +107,7 @@ public class RestTestHarness extends BaseTestHarness { public String put(String request, String content) throws IOException { HttpPut httpPut = new HttpPut(getBaseURL() + request); httpPut.setEntity(new StringEntity(content, ContentType.create( - "application/json", "utf-8"))); + "application/json", StandardCharsets.UTF_8))); return getResponse(httpPut); } @@ -134,7 +135,7 @@ public class RestTestHarness extends BaseTestHarness { public String post(String request, String content) throws IOException { HttpPost httpPost = new HttpPost(getBaseURL() + request); httpPost.setEntity(new StringEntity(content, ContentType.create( - "application/json", "utf-8"))); + "application/json", StandardCharsets.UTF_8))); return getResponse(httpPost); } @@ -189,7 +190,7 @@ public class RestTestHarness extends BaseTestHarness { HttpEntity entity = null; try { entity = httpClient.execute(request).getEntity(); - return EntityUtils.toString(entity, "UTF-8"); + return EntityUtils.toString(entity, StandardCharsets.UTF_8); } finally { EntityUtils.consumeQuietly(entity); }