mirror of https://github.com/apache/lucene.git
SOLR-7928: Improve CheckIndex to work against HdfsDirectory
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1717340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a8c41a13c9
commit
562d97a69d
|
@ -73,7 +73,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
* @lucene.experimental Please make a complete backup of your
|
||||
* index before using this to exorcise corrupted documents from your index!
|
||||
*/
|
||||
public class CheckIndex implements Closeable {
|
||||
public final class CheckIndex implements Closeable {
|
||||
|
||||
private PrintStream infoStream;
|
||||
private Directory dir;
|
||||
|
@ -2297,7 +2297,11 @@ public class CheckIndex implements Closeable {
|
|||
return true;
|
||||
}
|
||||
|
||||
private static boolean assertsOn() {
|
||||
/**
|
||||
* Check whether asserts are enabled or not.
|
||||
* @return true iff asserts are enabled
|
||||
*/
|
||||
public static boolean assertsOn() {
|
||||
assert testAsserts();
|
||||
return assertsOn;
|
||||
}
|
||||
|
@ -2338,11 +2342,11 @@ public class CheckIndex implements Closeable {
|
|||
int exitCode = doMain(args);
|
||||
System.exit(exitCode);
|
||||
}
|
||||
|
||||
// actual main: returns exit code instead of terminating JVM (for easy testing)
|
||||
@SuppressForbidden(reason = "System.out required: command line tool")
|
||||
private static int doMain(String args[]) throws IOException, InterruptedException {
|
||||
|
||||
/**
|
||||
* Run-time configuration options for CheckIndex commands.
|
||||
*/
|
||||
public static class Options {
|
||||
boolean doExorcise = false;
|
||||
boolean doCrossCheckTermVectors = false;
|
||||
boolean verbose = false;
|
||||
|
@ -2350,44 +2354,113 @@ public class CheckIndex implements Closeable {
|
|||
List<String> onlySegments = new ArrayList<>();
|
||||
String indexPath = null;
|
||||
String dirImpl = null;
|
||||
PrintStream out = null;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Options() {}
|
||||
|
||||
/**
|
||||
* Get the name of the FSDirectory implementation class to use.
|
||||
*/
|
||||
public String getDirImpl() {
|
||||
return dirImpl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the directory containing the index.
|
||||
*/
|
||||
public String getIndexPath() {
|
||||
return indexPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the PrintStream to use for reporting results.
|
||||
*/
|
||||
public void setOut(PrintStream out) {
|
||||
this.out = out;
|
||||
}
|
||||
}
|
||||
|
||||
// actual main: returns exit code instead of terminating JVM (for easy testing)
|
||||
@SuppressForbidden(reason = "System.out required: command line tool")
|
||||
private static int doMain(String args[]) throws IOException, InterruptedException {
|
||||
Options opts;
|
||||
try {
|
||||
opts = parseOptions(args);
|
||||
} catch (IllegalArgumentException e) {
|
||||
System.out.println(e.getMessage());
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!assertsOn())
|
||||
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
|
||||
|
||||
System.out.println("\nOpening index @ " + opts.indexPath + "\n");
|
||||
Directory directory = null;
|
||||
Path path = Paths.get(opts.indexPath);
|
||||
try {
|
||||
if (opts.dirImpl == null) {
|
||||
directory = FSDirectory.open(path);
|
||||
} else {
|
||||
directory = CommandLineUtil.newFSDirectory(opts.dirImpl, path);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
System.out.println("ERROR: could not open directory \"" + opts.indexPath + "\"; exiting");
|
||||
t.printStackTrace(System.out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
try (Directory dir = directory;
|
||||
CheckIndex checker = new CheckIndex(dir)) {
|
||||
opts.out = System.out;
|
||||
return checker.doCheck(opts);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse command line args into fields
|
||||
* @param args The command line arguments
|
||||
* @return An Options struct
|
||||
* @throws IllegalArgumentException if any of the CLI args are invalid
|
||||
*/
|
||||
public static Options parseOptions(String[] args) {
|
||||
Options opts = new Options();
|
||||
|
||||
int i = 0;
|
||||
while(i < args.length) {
|
||||
String arg = args[i];
|
||||
if ("-fast".equals(arg)) {
|
||||
doChecksumsOnly = true;
|
||||
opts.doChecksumsOnly = true;
|
||||
} else if ("-exorcise".equals(arg)) {
|
||||
doExorcise = true;
|
||||
opts.doExorcise = true;
|
||||
} else if ("-crossCheckTermVectors".equals(arg)) {
|
||||
doCrossCheckTermVectors = true;
|
||||
opts.doCrossCheckTermVectors = true;
|
||||
} else if (arg.equals("-verbose")) {
|
||||
verbose = true;
|
||||
opts.verbose = true;
|
||||
} else if (arg.equals("-segment")) {
|
||||
if (i == args.length-1) {
|
||||
System.out.println("ERROR: missing name for -segment option");
|
||||
return 1;
|
||||
throw new IllegalArgumentException("ERROR: missing name for -segment option");
|
||||
}
|
||||
i++;
|
||||
onlySegments.add(args[i]);
|
||||
opts.onlySegments.add(args[i]);
|
||||
} else if ("-dir-impl".equals(arg)) {
|
||||
if (i == args.length - 1) {
|
||||
System.out.println("ERROR: missing value for -dir-impl option");
|
||||
return 1;
|
||||
throw new IllegalArgumentException("ERROR: missing value for -dir-impl option");
|
||||
}
|
||||
i++;
|
||||
dirImpl = args[i];
|
||||
opts.dirImpl = args[i];
|
||||
} else {
|
||||
if (indexPath != null) {
|
||||
System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
|
||||
return 1;
|
||||
if (opts.indexPath != null) {
|
||||
throw new IllegalArgumentException("ERROR: unexpected extra argument '" + args[i] + "'");
|
||||
}
|
||||
indexPath = args[i];
|
||||
opts.indexPath = args[i];
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (indexPath == null) {
|
||||
System.out.println("\nERROR: index path not specified");
|
||||
System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
|
||||
if (opts.indexPath == null) {
|
||||
throw new IllegalArgumentException("\nERROR: index path not specified" +
|
||||
"\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
|
||||
"\n" +
|
||||
" -exorcise: actually write a new segments_N file, removing any problematic segments\n" +
|
||||
" -fast: just verify file checksums, omitting logical integrity checks\n" +
|
||||
|
@ -2413,73 +2486,58 @@ public class CheckIndex implements Closeable {
|
|||
"\n" +
|
||||
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
|
||||
"corruption, else 0.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!assertsOn())
|
||||
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
|
||||
|
||||
if (onlySegments.size() == 0)
|
||||
onlySegments = null;
|
||||
else if (doExorcise) {
|
||||
System.out.println("ERROR: cannot specify both -exorcise and -segment");
|
||||
return 1;
|
||||
if (opts.onlySegments.size() == 0) {
|
||||
opts.onlySegments = null;
|
||||
} else if (opts.doExorcise) {
|
||||
throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
|
||||
}
|
||||
|
||||
if (doChecksumsOnly && doCrossCheckTermVectors) {
|
||||
System.out.println("ERROR: cannot specify both -fast and -crossCheckTermVectors");
|
||||
if (opts.doChecksumsOnly && opts.doCrossCheckTermVectors) {
|
||||
throw new IllegalArgumentException("ERROR: cannot specify both -fast and -crossCheckTermVectors");
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually perform the index check
|
||||
* @param opts The options to use for this check
|
||||
* @return 0 iff the index is clean, 1 otherwise
|
||||
*/
|
||||
public int doCheck(Options opts) throws IOException, InterruptedException {
|
||||
setCrossCheckTermVectors(opts.doCrossCheckTermVectors);
|
||||
setChecksumsOnly(opts.doChecksumsOnly);
|
||||
setInfoStream(opts.out, opts.verbose);
|
||||
|
||||
Status result = checkIndex(opts.onlySegments);
|
||||
if (result.missingSegments) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
System.out.println("\nOpening index @ " + indexPath + "\n");
|
||||
Directory directory = null;
|
||||
Path path = Paths.get(indexPath);
|
||||
try {
|
||||
if (dirImpl == null) {
|
||||
directory = FSDirectory.open(path);
|
||||
if (!result.clean) {
|
||||
if (!opts.doExorcise) {
|
||||
opts.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
|
||||
} else {
|
||||
directory = CommandLineUtil.newFSDirectory(dirImpl, path);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
|
||||
t.printStackTrace(System.out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
try (Directory dir = directory;
|
||||
CheckIndex checker = new CheckIndex(dir)) {
|
||||
checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
|
||||
checker.setChecksumsOnly(doChecksumsOnly);
|
||||
checker.setInfoStream(System.out, verbose);
|
||||
|
||||
Status result = checker.checkIndex(onlySegments);
|
||||
if (result.missingSegments) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!result.clean) {
|
||||
if (!doExorcise) {
|
||||
System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
|
||||
} else {
|
||||
System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
|
||||
System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
|
||||
for(int s=0;s<5;s++) {
|
||||
Thread.sleep(1000);
|
||||
System.out.println(" " + (5-s) + "...");
|
||||
}
|
||||
System.out.println("Writing...");
|
||||
checker.exorciseIndex(result);
|
||||
System.out.println("OK");
|
||||
System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
|
||||
opts.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
|
||||
opts.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
|
||||
for(int s=0;s<5;s++) {
|
||||
Thread.sleep(1000);
|
||||
opts.out.println(" " + (5-s) + "...");
|
||||
}
|
||||
opts.out.println("Writing...");
|
||||
exorciseIndex(result);
|
||||
opts.out.println("OK");
|
||||
opts.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
|
||||
}
|
||||
System.out.println("");
|
||||
|
||||
if (result.clean == true) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
opts.out.println("");
|
||||
|
||||
if (result.clean == true) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,177 +17,48 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestCheckIndex extends LuceneTestCase {
|
||||
public class TestCheckIndex extends BaseTestCheckIndex {
|
||||
private Directory directory;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = newDirectory();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
directory.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletedDocs() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(2));
|
||||
for(int i=0;i<19;i++) {
|
||||
Document doc = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
doc.add(newField("field", "aaa"+i, customType));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
writer.deleteDocuments(new Term("field","aaa5"));
|
||||
writer.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (indexStatus.clean == false) {
|
||||
System.out.println("CheckIndex failed");
|
||||
System.out.println(bos.toString(IOUtils.UTF_8));
|
||||
fail();
|
||||
}
|
||||
|
||||
final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
|
||||
assertTrue(seg.openReaderPassed);
|
||||
|
||||
assertNotNull(seg.diagnostics);
|
||||
|
||||
assertNotNull(seg.fieldNormStatus);
|
||||
assertNull(seg.fieldNormStatus.error);
|
||||
assertEquals(1, seg.fieldNormStatus.totFields);
|
||||
|
||||
assertNotNull(seg.termIndexStatus);
|
||||
assertNull(seg.termIndexStatus.error);
|
||||
assertEquals(18, seg.termIndexStatus.termCount);
|
||||
assertEquals(18, seg.termIndexStatus.totFreq);
|
||||
assertEquals(18, seg.termIndexStatus.totPos);
|
||||
|
||||
assertNotNull(seg.storedFieldStatus);
|
||||
assertNull(seg.storedFieldStatus.error);
|
||||
assertEquals(18, seg.storedFieldStatus.docCount);
|
||||
assertEquals(18, seg.storedFieldStatus.totFields);
|
||||
|
||||
assertNotNull(seg.termVectorStatus);
|
||||
assertNull(seg.termVectorStatus.error);
|
||||
assertEquals(18, seg.termVectorStatus.docCount);
|
||||
assertEquals(18, seg.termVectorStatus.totVectors);
|
||||
|
||||
assertNotNull(seg.diagnostics.get("java.vm.version"));
|
||||
assertNotNull(seg.diagnostics.get("java.runtime.version"));
|
||||
|
||||
assertTrue(seg.diagnostics.size() > 0);
|
||||
final List<String> onlySegments = new ArrayList<>();
|
||||
onlySegments.add("_0");
|
||||
|
||||
assertTrue(checker.checkIndex(onlySegments).clean == true);
|
||||
checker.close();
|
||||
dir.close();
|
||||
testDeletedDocs(directory);
|
||||
}
|
||||
|
||||
// LUCENE-4221: we have to let these thru, for now
|
||||
@Test
|
||||
public void testBogusTermVectors() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
Document doc = new Document();
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setStoreTermVectors(true);
|
||||
ft.setStoreTermVectorOffsets(true);
|
||||
Field field = new Field("foo", "", ft);
|
||||
field.setTokenStream(new CannedTokenStream(
|
||||
new Token("bar", 5, 10), new Token("bar", 1, 4)
|
||||
));
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
dir.close(); // checkindex
|
||||
testBogusTermVectors(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChecksumsOnly() throws IOException {
|
||||
LineFileDocs lf = new LineFileDocs(random());
|
||||
Directory dir = newDirectory();
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
iw.addDocument(lf.nextDoc());
|
||||
}
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
iw.close();
|
||||
lf.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
assertTrue(indexStatus.clean);
|
||||
checker.close();
|
||||
dir.close();
|
||||
analyzer.close();
|
||||
testChecksumsOnly(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChecksumsOnlyVerbose() throws IOException {
|
||||
LineFileDocs lf = new LineFileDocs(random());
|
||||
Directory dir = newDirectory();
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
iw.addDocument(lf.nextDoc());
|
||||
}
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
iw.close();
|
||||
lf.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
assertTrue(indexStatus.clean);
|
||||
checker.close();
|
||||
dir.close();
|
||||
analyzer.close();
|
||||
testChecksumsOnlyVerbose(directory);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testObtainsLock() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
|
||||
// keep IW open...
|
||||
try {
|
||||
new CheckIndex(dir);
|
||||
fail("should not have obtained write lock");
|
||||
} catch (LockObtainFailedException expected) {
|
||||
// ok
|
||||
}
|
||||
|
||||
iw.close();
|
||||
dir.close();
|
||||
testObtainsLock(directory);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
* Base class for CheckIndex tests.
|
||||
*/
|
||||
public class BaseTestCheckIndex extends LuceneTestCase {
|
||||
|
||||
public void testDeletedDocs(Directory dir) throws IOException {
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(2));
|
||||
for(int i=0;i<19;i++) {
|
||||
Document doc = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
doc.add(newField("field", "aaa"+i, customType));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
writer.deleteDocuments(new Term("field","aaa5"));
|
||||
writer.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (indexStatus.clean == false) {
|
||||
System.out.println("CheckIndex failed");
|
||||
System.out.println(bos.toString(IOUtils.UTF_8));
|
||||
fail();
|
||||
}
|
||||
|
||||
final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
|
||||
assertTrue(seg.openReaderPassed);
|
||||
|
||||
assertNotNull(seg.diagnostics);
|
||||
|
||||
assertNotNull(seg.fieldNormStatus);
|
||||
assertNull(seg.fieldNormStatus.error);
|
||||
assertEquals(1, seg.fieldNormStatus.totFields);
|
||||
|
||||
assertNotNull(seg.termIndexStatus);
|
||||
assertNull(seg.termIndexStatus.error);
|
||||
assertEquals(18, seg.termIndexStatus.termCount);
|
||||
assertEquals(18, seg.termIndexStatus.totFreq);
|
||||
assertEquals(18, seg.termIndexStatus.totPos);
|
||||
|
||||
assertNotNull(seg.storedFieldStatus);
|
||||
assertNull(seg.storedFieldStatus.error);
|
||||
assertEquals(18, seg.storedFieldStatus.docCount);
|
||||
assertEquals(18, seg.storedFieldStatus.totFields);
|
||||
|
||||
assertNotNull(seg.termVectorStatus);
|
||||
assertNull(seg.termVectorStatus.error);
|
||||
assertEquals(18, seg.termVectorStatus.docCount);
|
||||
assertEquals(18, seg.termVectorStatus.totVectors);
|
||||
|
||||
assertNotNull(seg.diagnostics.get("java.vm.version"));
|
||||
assertNotNull(seg.diagnostics.get("java.runtime.version"));
|
||||
|
||||
assertTrue(seg.diagnostics.size() > 0);
|
||||
final List<String> onlySegments = new ArrayList<>();
|
||||
onlySegments.add("_0");
|
||||
|
||||
assertTrue(checker.checkIndex(onlySegments).clean == true);
|
||||
checker.close();
|
||||
}
|
||||
|
||||
// LUCENE-4221: we have to let these thru, for now
|
||||
public void testBogusTermVectors(Directory dir) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
Document doc = new Document();
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setStoreTermVectors(true);
|
||||
ft.setStoreTermVectorOffsets(true);
|
||||
Field field = new Field("foo", "", ft);
|
||||
field.setTokenStream(new CannedTokenStream(
|
||||
new Token("bar", 5, 10), new Token("bar", 1, 4)
|
||||
));
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
}
|
||||
|
||||
public void testChecksumsOnly(Directory dir) throws IOException {
|
||||
LineFileDocs lf = new LineFileDocs(random());
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
iw.addDocument(lf.nextDoc());
|
||||
}
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
iw.close();
|
||||
lf.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
assertTrue(indexStatus.clean);
|
||||
checker.close();
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testChecksumsOnlyVerbose(Directory dir) throws IOException {
|
||||
LineFileDocs lf = new LineFileDocs(random());
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
iw.addDocument(lf.nextDoc());
|
||||
}
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
iw.close();
|
||||
lf.close();
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
assertTrue(indexStatus.clean);
|
||||
checker.close();
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
public void testObtainsLock(Directory dir) throws IOException {
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
|
||||
// keep IW open...
|
||||
try {
|
||||
new CheckIndex(dir);
|
||||
fail("should not have obtained write lock");
|
||||
} catch (LockObtainFailedException expected) {
|
||||
// ok
|
||||
}
|
||||
|
||||
iw.close();
|
||||
}
|
||||
}
|
|
@ -182,6 +182,9 @@ Detailed Change List
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-7928: Improve CheckIndex to work against HdfsDirectory
|
||||
(Mike Drob, Gregory Chanan)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
package org.apache.solr.index.hdfs;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
import org.apache.solr.core.HdfsDirectoryFactory;
|
||||
import org.apache.solr.store.hdfs.HdfsDirectory;
|
||||
import org.apache.solr.util.HdfsUtil;
|
||||
|
||||
public class CheckHdfsIndex {
|
||||
public static void main(String[] args) throws IOException, InterruptedException {
|
||||
int exitCode = doMain(args);
|
||||
System.exit(exitCode);
|
||||
}
|
||||
|
||||
// actual main: returns exit code instead of terminating JVM (for easy testing)
|
||||
@SuppressForbidden(reason = "System.out required: command line tool")
|
||||
protected static int doMain(String[] args) throws IOException, InterruptedException {
|
||||
CheckIndex.Options opts;
|
||||
try {
|
||||
opts = CheckIndex.parseOptions(args);
|
||||
} catch (IllegalArgumentException e) {
|
||||
System.out.println(e.getMessage());
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!CheckIndex.assertsOn()) {
|
||||
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
|
||||
}
|
||||
|
||||
if (opts.getDirImpl() != null) {
|
||||
System.out.println("\nIgnoring specified -dir-impl, instead using " + HdfsDirectory.class.getSimpleName());
|
||||
}
|
||||
|
||||
System.out.println("\nOpening index @ " + opts.getIndexPath() + "\n");
|
||||
|
||||
Directory directory;
|
||||
try {
|
||||
directory = new HdfsDirectory(new Path(opts.getIndexPath()), getConf());
|
||||
} catch (IOException e) {
|
||||
System.out.println("ERROR: could not open hdfs directory \"" + opts.getIndexPath() + "\"; exiting");
|
||||
e.printStackTrace(System.out);
|
||||
return 1;
|
||||
}
|
||||
|
||||
try (Directory dir = directory; CheckIndex checker = new CheckIndex(dir)) {
|
||||
opts.setOut(System.out);
|
||||
return checker.doCheck(opts);
|
||||
}
|
||||
}
|
||||
|
||||
private static Configuration getConf() {
|
||||
Configuration conf = new Configuration();
|
||||
String confDir = System.getProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY);
|
||||
HdfsUtil.addHdfsResources(conf, confDir);
|
||||
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
return conf;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An HDFS CheckIndex implementation.
|
||||
*/
|
||||
package org.apache.solr.index.hdfs;
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
package org.apache.solr.index.hdfs;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.lucene.index.BaseTestCheckIndex;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NoLockFactory;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
|
||||
import org.apache.solr.cloud.hdfs.HdfsTestUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.store.hdfs.HdfsDirectory;
|
||||
import org.apache.solr.util.BadHdfsThreadsFilter;
|
||||
import org.junit.After;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
|
||||
|
||||
@ThreadLeakFilters(defaultFilters = true, filters = {
|
||||
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
|
||||
})
|
||||
public class CheckHdfsIndexTest extends AbstractFullDistribZkTestBase {
|
||||
private static MiniDFSCluster dfsCluster;
|
||||
private static Path path;
|
||||
|
||||
private BaseTestCheckIndex testCheckIndex;
|
||||
private Directory directory;
|
||||
|
||||
public CheckHdfsIndexTest() {
|
||||
super();
|
||||
sliceCount = 1;
|
||||
fixShardCount(1);
|
||||
|
||||
testCheckIndex = new BaseTestCheckIndex();
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void setupClass() throws Exception {
|
||||
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
|
||||
path = new Path(HdfsTestUtil.getURI(dfsCluster) + "/solr/");
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void teardownClass() throws Exception {
|
||||
HdfsTestUtil.teardownClass(dfsCluster);
|
||||
dfsCluster = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
|
||||
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
|
||||
directory = new HdfsDirectory(path, NoLockFactory.INSTANCE, conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
directory.close();
|
||||
dfsCluster.getFileSystem().delete(path, true);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDataDir(String dataDir) throws IOException {
|
||||
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void doTest() throws Exception {
|
||||
indexr(id, 1);
|
||||
commit();
|
||||
|
||||
waitForRecoveriesToFinish(false);
|
||||
|
||||
String[] args;
|
||||
{
|
||||
SolrClient client = clients.get(0);
|
||||
NamedList<Object> response = client.query(new SolrQuery().setRequestHandler("/admin/system")).getResponse();
|
||||
NamedList<Object> coreInfo = (NamedList<Object>) response.get("core");
|
||||
String indexDir = (String) ((NamedList<Object>) coreInfo.get("directory")).get("data") + "/index";
|
||||
|
||||
args = new String[] {indexDir};
|
||||
}
|
||||
|
||||
assertEquals("CheckHdfsIndex return status", 0, CheckHdfsIndex.doMain(args));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletedDocs() throws IOException {
|
||||
testCheckIndex.testDeletedDocs(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBogusTermVectors() throws IOException {
|
||||
testCheckIndex.testBogusTermVectors(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChecksumsOnly() throws IOException {
|
||||
testCheckIndex.testChecksumsOnly(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testChecksumsOnlyVerbose() throws IOException {
|
||||
testCheckIndex.testChecksumsOnlyVerbose(directory);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("We explicitly use a NoLockFactory, so this test doesn't make sense.")
|
||||
public void testObtainsLock() throws IOException {
|
||||
testCheckIndex.testObtainsLock(directory);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue