LUCENE-5952: better error messages when version fails to parse; use simpler string tokenizer; don't check major versions at such a low level

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1626550 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-09-21 09:44:11 +00:00
parent 346e8f52bd
commit f8a74cb587
10 changed files with 266 additions and 71 deletions

View File

@ -158,6 +158,11 @@ Bug Fixes
for every IndexWriter method: instead such disasters will cause IW to close itself
defensively. (Robert Muir, Mike McCandless)
* LUCENE-5952: Improve error messages when version cannot be parsed;
don't check for too old or too new major version (it's too low level
to enforce here); use simple string tokenizer. (Ryan Ernst, Uwe Schindler,
Robert Muir, Mike McCandless)
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect

View File

@ -17,10 +17,6 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@ -28,6 +24,7 @@ import java.io.Reader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -40,6 +37,10 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Abstract parent class for analysis factories {@link TokenizerFactory},
* {@link TokenFilterFactory} and {@link CharFilterFactory}.
@ -68,7 +69,15 @@ public abstract class AbstractAnalysisFactory {
protected AbstractAnalysisFactory(Map<String,String> args) {
originalArgs = Collections.unmodifiableMap(new HashMap<>(args));
String version = get(args, LUCENE_MATCH_VERSION_PARAM);
luceneMatchVersion = version == null ? null : Version.parseLeniently(version);
if (version == null) {
luceneMatchVersion = null;
} else {
try {
luceneMatchVersion = Version.parseLeniently(version);
} catch (ParseException pe) {
throw new IllegalArgumentException(pe);
}
}
args.remove(CLASS_NAME); // consume the class arg
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene40;
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
@ -55,7 +56,12 @@ public class Lucene40SegmentInfoReader extends SegmentInfoReader {
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
Lucene40SegmentInfoFormat.VERSION_START,
Lucene40SegmentInfoFormat.VERSION_CURRENT);
final Version version = Version.parse(input.readString());
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);

View File

@ -17,6 +17,15 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License.
*/
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
@ -26,8 +35,8 @@ import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
@ -36,14 +45,6 @@ import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NoMergeScheduler;
import org.apache.lucene.util.Version;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* Create an index. <br>
* Other side effects: index writer object in perfRunData is set. <br>
@ -100,6 +101,12 @@ public class CreateIndexTask extends PerfTask {
public static IndexWriterConfig createWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) {
@SuppressWarnings("deprecation")
Version version;
try {
version = Version.parseLeniently(config.get("writer.version", Version.LATEST.toString()));
} catch (ParseException pe) {
throw new IllegalArgumentException(pe);
}
IndexWriterConfig iwConf = new IndexWriterConfig(runData.getAnalyzer());
iwConf.setOpenMode(mode);
IndexDeletionPolicy indexDeletionPolicy = getIndexDeletionPolicy(config);

View File

@ -19,12 +19,14 @@ package org.apache.lucene.codecs.simpletext;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
@ -62,7 +64,12 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
final Version version = Version.parse(readString(SI_VERSION.length, scratch));
final Version version;
try {
version = Version.parse(readString(SI_VERSION.length, scratch));
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene46;
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
@ -50,7 +51,13 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
Lucene46SegmentInfoFormat.VERSION_START,
Lucene46SegmentInfoFormat.VERSION_CURRENT);
final Version version = Version.parse(input.readString());
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);

View File

@ -0,0 +1,56 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Used for parsing Version strings so we don't have to
* use overkill String.split nor StringTokenizer (which silently
* skips empty tokens). */
final class StrictStringTokenizer {
public StrictStringTokenizer(String s, char delimiter) {
this.s = s;
this.delimiter = delimiter;
}
public final String nextToken() {
if (pos < 0) {
throw new IllegalStateException("no more tokens");
}
int pos1 = s.indexOf(delimiter, pos);
String s1;
if (pos1 >= 0) {
s1 = s.substring(pos, pos1);
pos = pos1+1;
} else {
s1 = s.substring(pos);
pos=-1;
}
return s1;
}
public final boolean hasMoreTokens() {
return pos >= 0;
}
private final String s;
private final char delimiter;
private int pos;
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.text.ParseException;
import java.util.Locale;
/**
@ -260,36 +261,83 @@ public final class Version {
* Part {@code ".bugfix"} and part {@code ".prerelease"} are optional.
* Note that this is forwards compatible: the parsed version does not have to exist as
* a constant.
*
* @lucene.internal
*/
public static Version parse(String version) {
String[] pieces = version.split("\\.");
if (pieces.length < 2 || pieces.length > 4) {
throw new IllegalArgumentException("Version is not in form major.minor.bugfix(.prerelease): " + version);
public static Version parse(String version) throws ParseException {
StrictStringTokenizer tokens = new StrictStringTokenizer(version, '.');
if (tokens.hasMoreTokens() == false) {
throw new ParseException("Version is not in form major.minor.bugfix(.prerelease) (got: " + version + ")", 0);
}
int major;
String token = tokens.nextToken();
try {
major = Integer.parseInt(token);
} catch (NumberFormatException nfe) {
throw new ParseException("Failed to parse major version from \"" + token + "\" (got: " + version + ")", 0);
}
if (tokens.hasMoreTokens() == false) {
throw new ParseException("Version is not in form major.minor.bugfix(.prerelease) (got: " + version + ")", 0);
}
int minor;
token = tokens.nextToken();
try {
minor = Integer.parseInt(token);
} catch (NumberFormatException nfe) {
throw new ParseException("Failed to parse minor version from \"" + token + "\" (got: " + version + ")", 0);
}
int major = Integer.parseInt(pieces[0]);
int minor = Integer.parseInt(pieces[1]);
int bugfix = 0;
int prerelease = 0;
if (pieces.length > 2) {
bugfix = Integer.parseInt(pieces[2]);
}
if (pieces.length > 3) {
prerelease = Integer.parseInt(pieces[3]);
if (prerelease == 0) {
throw new IllegalArgumentException("Invalid value " + prerelease + " for prerelease of version " + version +", should be 1 or 2");
if (tokens.hasMoreTokens()) {
token = tokens.nextToken();
try {
bugfix = Integer.parseInt(token);
} catch (NumberFormatException nfe) {
throw new ParseException("Failed to parse bugfix version from \"" + token + "\" (got: " + version + ")", 0);
}
if (tokens.hasMoreTokens()) {
token = tokens.nextToken();
try {
prerelease = Integer.parseInt(token);
} catch (NumberFormatException nfe) {
throw new ParseException("Failed to parse prerelease version from \"" + token + "\" (got: " + version + ")", 0);
}
if (prerelease == 0) {
throw new ParseException("Invalid value " + prerelease + " for prerelease; should be 1 or 2 (got: " + version + ")", 0);
}
if (tokens.hasMoreTokens()) {
// Too many tokens!
throw new ParseException("Version is not in form major.minor.bugfix(.prerelease) (got: " + version + ")", 0);
}
}
}
return new Version(major, minor, bugfix, prerelease);
try {
return new Version(major, minor, bugfix, prerelease);
} catch (IllegalArgumentException iae) {
ParseException pe = new ParseException("failed to parse version string \"" + version + "\": " + iae.getMessage(), 0);
pe.initCause(iae);
throw pe;
}
}
/**
* Parse the given version number as a constant or dot based version.
* <p>This method allows to use {@code "LUCENE_X_Y"} constant names,
* or version numbers in the format {@code "x.y.z"}.
*
* @lucene.internal
*/
public static Version parseLeniently(String version) {
public static Version parseLeniently(String version) throws ParseException {
String versionOrig = version;
version = version.toUpperCase(Locale.ROOT);
switch (version) {
case "LATEST":
@ -306,7 +354,13 @@ public final class Version {
.replaceFirst("^LUCENE_(\\d+)_(\\d+)_(\\d+)$", "$1.$2.$3")
.replaceFirst("^LUCENE_(\\d+)_(\\d+)$", "$1.$2.0")
.replaceFirst("^LUCENE_(\\d)(\\d)$", "$1.$2.0");
return parse(version);
try {
return parse(version);
} catch (ParseException pe) {
ParseException pe2 = new ParseException("failed to parse lenient version string \"" + versionOrig + "\": " + pe.getMessage(), 0);
pe2.initCause(pe);
throw pe2;
}
}
}
@ -333,23 +387,27 @@ public final class Version {
this.minor = minor;
this.bugfix = bugfix;
this.prerelease = prerelease;
if (major > 6 || major < 4) { // TODO fix this!!!
throw new IllegalArgumentException("Lucene 6.x only supports 6.x, 5.x, and 4.x versions");
// NOTE: do not enforce major version so we remain future proof, except to
// make sure it fits in the 8 bits we encode it into:
if (major > 255 || major < 0) {
throw new IllegalArgumentException("Illegal major version: " + major);
}
if (minor > 255 | minor < 0) {
if (minor > 255 || minor < 0) {
throw new IllegalArgumentException("Illegal minor version: " + minor);
}
if (bugfix > 255 | bugfix < 0) {
if (bugfix > 255 || bugfix < 0) {
throw new IllegalArgumentException("Illegal bugfix version: " + bugfix);
}
if (prerelease > 2 | prerelease < 0) {
if (prerelease > 2 || prerelease < 0) {
throw new IllegalArgumentException("Illegal prerelease version: " + prerelease);
}
if (prerelease != 0 && (minor != 0 || bugfix != 0)) {
throw new IllegalArgumentException("Prerelease version only supported with major release");
throw new IllegalArgumentException("Prerelease version only supported with major release (got prerelease: " + prerelease + ", minor: " + minor + ", bugfix: " + bugfix + ")");
}
encodedValue = major << 18 | minor << 10 | bugfix << 2 | prerelease;
assert encodedIsValid();
}
/**
@ -361,10 +419,6 @@ public final class Version {
@Override
public String toString() {
int major = (encodedValue >>> 18) & 0xFF;
int minor = (encodedValue >>> 10) & 0xFF;
int bugfix = (encodedValue >>> 2) & 0xFF;
int prerelease = encodedValue & 0x3;
if (prerelease == 0) {
return "" + major + "." + minor + "." + bugfix;
}
@ -376,6 +430,15 @@ public final class Version {
return o != null && o instanceof Version && ((Version)o).encodedValue == encodedValue;
}
// Used only by assert:
private boolean encodedIsValid() {
assert major == ((encodedValue >>> 18) & 0xFF);
assert minor == ((encodedValue >>> 10) & 0xFF);
assert bugfix == ((encodedValue >>> 2) & 0xFF);
assert prerelease == (encodedValue & 0x03);
return true;
}
@Override
public int hashCode() {
return encodedValue;

View File

@ -1,3 +1,5 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -15,10 +17,9 @@
* limitations under the License.
*/
package org.apache.lucene.util;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.text.ParseException;
import java.util.Locale;
import java.util.Random;
@ -48,7 +49,7 @@ public class TestVersion extends LuceneTestCase {
assertEquals("4.0.0.2", Version.LUCENE_4_0_0.toString());
}
public void testParseLeniently() {
public void testParseLeniently() throws Exception {
assertEquals(Version.LUCENE_4_9_0, Version.parseLeniently("LUCENE_49"));
assertEquals(Version.LUCENE_4_9_0, Version.parseLeniently("LUCENE_4_9"));
assertEquals(Version.LUCENE_4_9_0, Version.parseLeniently("LUCENE_4_9_0"));
@ -72,20 +73,30 @@ public class TestVersion extends LuceneTestCase {
try {
Version.parseLeniently("LUCENE");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("LUCENE"));
}
try {
Version.parseLeniently("LUCENE_410");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("LUCENE_410"));
}
try {
Version.parseLeniently("LUCENE41");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("LUCENE41"));
}
try {
Version.parseLeniently("LUCENE_6.0.0");
fail();
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("LUCENE_6.0.0"));
}
}
@ -103,97 +114,120 @@ public class TestVersion extends LuceneTestCase {
assertTrue(atLeastOne);
}
public void testParse() {
public void testParse() throws Exception {
assertEquals(Version.LUCENE_5_0_0, Version.parse("5.0.0"));
assertEquals(Version.LUCENE_4_1_0, Version.parse("4.1"));
assertEquals(Version.LUCENE_4_1_0, Version.parse("4.1.0"));
assertEquals(Version.LUCENE_4_0_0_ALPHA, Version.parse("4.0.0"));
assertEquals(Version.LUCENE_4_0_0_BETA, Version.parse("4.0.0.1"));
assertEquals(Version.LUCENE_4_0_0, Version.parse("4.0.0.2"));
// Version does not pass judgement on the major version:
assertEquals(1, Version.parse("1.0").major);
assertEquals(6, Version.parse("6.0.0").major);
}
public void testForwardsCompatibility() {
public void testForwardsCompatibility() throws Exception {
assertTrue(Version.parse("4.7.10").onOrAfter(Version.LUCENE_4_7_2));
assertTrue(Version.parse("4.20.0").onOrAfter(Version.LUCENE_4_8_1));
assertTrue(Version.parse("5.10.20").onOrAfter(Version.LUCENE_5_0_0));
}
public void testParseExceptions() {
try {
Version.parse("1.0");
fail();
} catch (IllegalArgumentException iae) {
// pass
}
try {
Version.parse("LUCENE_4_0_0");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("LUCENE_4_0_0"));
}
try {
Version.parse("4.256");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.256"));
}
try {
Version.parse("4.-1");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.-1"));
}
try {
Version.parse("4.1.256");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.256"));
}
try {
Version.parse("4.1.-1");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.-1"));
}
try {
Version.parse("4.1.1.3");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.1.3"));
}
try {
Version.parse("4.1.1.-1");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.1.-1"));
}
try {
Version.parse("4.1.1.1");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.1.1"));
}
try {
Version.parse("4.1.1.2");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.1.1.2"));
}
try {
Version.parse("4.0.0.0");
fail();
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.0.0.0"));
}
try {
Version.parse("4.0.0.1.42");
fail();
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4.0.0.1.42"));
}
try {
Version.parse("4..0.1");
fail();
} catch (ParseException pe) {
// pass
assertTrue(pe.getMessage().contains("4..0.1"));
}
}
@ -224,7 +258,7 @@ public class TestVersion extends LuceneTestCase {
Version.LATEST.toString(), commonBuildVersion);
}
public void testEqualsHashCode() {
public void testEqualsHashCode() throws Exception {
Random random = random();
String version = "" + (4 + random.nextInt(1)) + "." + random.nextInt(10) + "." + random.nextInt(10);
Version v1 = Version.parseLeniently(version);

View File

@ -48,6 +48,7 @@ import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
@ -441,9 +442,9 @@ public class Config {
final Version version;
try {
version = Version.parseLeniently(matchVersion);
} catch (IllegalArgumentException iae) {
} catch (ParseException pe) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Invalid luceneMatchVersion. Should be of the form 'V.V.V' (e.g. 4.8.0)", iae);
"Invalid luceneMatchVersion. Should be of the form 'V.V.V' (e.g. 4.8.0)", pe);
}
if (version == Version.LATEST && !versionWarningAlreadyLogged.getAndSet(true)) {