SOLR-7928: Improve CheckIndex to work against HdfsDirectory

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1717340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gregory Chanan 2015-11-30 22:46:48 +00:00
parent a8c41a13c9
commit 562d97a69d
7 changed files with 599 additions and 235 deletions

View File

@ -73,7 +73,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
* @lucene.experimental Please make a complete backup of your
* index before using this to exorcise corrupted documents from your index!
*/
public class CheckIndex implements Closeable {
public final class CheckIndex implements Closeable {
private PrintStream infoStream;
private Directory dir;
@ -2297,7 +2297,11 @@ public class CheckIndex implements Closeable {
return true;
}
private static boolean assertsOn() {
/**
* Check whether asserts are enabled or not.
* @return true iff asserts are enabled
*/
public static boolean assertsOn() {
assert testAsserts();
return assertsOn;
}
@ -2339,10 +2343,10 @@ public class CheckIndex implements Closeable {
System.exit(exitCode);
}
// actual main: returns exit code instead of terminating JVM (for easy testing)
@SuppressForbidden(reason = "System.out required: command line tool")
private static int doMain(String args[]) throws IOException, InterruptedException {
/**
* Run-time configuration options for CheckIndex commands.
*/
public static class Options {
boolean doExorcise = false;
boolean doCrossCheckTermVectors = false;
boolean verbose = false;
@ -2350,44 +2354,113 @@ public class CheckIndex implements Closeable {
List<String> onlySegments = new ArrayList<>();
String indexPath = null;
String dirImpl = null;
PrintStream out = null;
/** Sole constructor. */
public Options() {}
/**
* Get the name of the FSDirectory implementation class to use.
*/
public String getDirImpl() {
return dirImpl;
}
/**
* Get the directory containing the index.
*/
public String getIndexPath() {
return indexPath;
}
/**
* Set the PrintStream to use for reporting results.
*/
public void setOut(PrintStream out) {
this.out = out;
}
}
// actual main: returns exit code instead of terminating JVM (for easy testing)
@SuppressForbidden(reason = "System.out required: command line tool")
private static int doMain(String args[]) throws IOException, InterruptedException {
Options opts;
try {
opts = parseOptions(args);
} catch (IllegalArgumentException e) {
System.out.println(e.getMessage());
return 1;
}
if (!assertsOn())
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
System.out.println("\nOpening index @ " + opts.indexPath + "\n");
Directory directory = null;
Path path = Paths.get(opts.indexPath);
try {
if (opts.dirImpl == null) {
directory = FSDirectory.open(path);
} else {
directory = CommandLineUtil.newFSDirectory(opts.dirImpl, path);
}
} catch (Throwable t) {
System.out.println("ERROR: could not open directory \"" + opts.indexPath + "\"; exiting");
t.printStackTrace(System.out);
return 1;
}
try (Directory dir = directory;
CheckIndex checker = new CheckIndex(dir)) {
opts.out = System.out;
return checker.doCheck(opts);
}
}
/**
* Parse command line args into fields
* @param args The command line arguments
* @return An Options struct
* @throws IllegalArgumentException if any of the CLI args are invalid
*/
public static Options parseOptions(String[] args) {
Options opts = new Options();
int i = 0;
while(i < args.length) {
String arg = args[i];
if ("-fast".equals(arg)) {
doChecksumsOnly = true;
opts.doChecksumsOnly = true;
} else if ("-exorcise".equals(arg)) {
doExorcise = true;
opts.doExorcise = true;
} else if ("-crossCheckTermVectors".equals(arg)) {
doCrossCheckTermVectors = true;
opts.doCrossCheckTermVectors = true;
} else if (arg.equals("-verbose")) {
verbose = true;
opts.verbose = true;
} else if (arg.equals("-segment")) {
if (i == args.length-1) {
System.out.println("ERROR: missing name for -segment option");
return 1;
throw new IllegalArgumentException("ERROR: missing name for -segment option");
}
i++;
onlySegments.add(args[i]);
opts.onlySegments.add(args[i]);
} else if ("-dir-impl".equals(arg)) {
if (i == args.length - 1) {
System.out.println("ERROR: missing value for -dir-impl option");
return 1;
throw new IllegalArgumentException("ERROR: missing value for -dir-impl option");
}
i++;
dirImpl = args[i];
opts.dirImpl = args[i];
} else {
if (indexPath != null) {
System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
return 1;
if (opts.indexPath != null) {
throw new IllegalArgumentException("ERROR: unexpected extra argument '" + args[i] + "'");
}
indexPath = args[i];
opts.indexPath = args[i];
}
i++;
}
if (indexPath == null) {
System.out.println("\nERROR: index path not specified");
System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
if (opts.indexPath == null) {
throw new IllegalArgumentException("\nERROR: index path not specified" +
"\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
"\n" +
" -exorcise: actually write a new segments_N file, removing any problematic segments\n" +
" -fast: just verify file checksums, omitting logical integrity checks\n" +
@ -2413,67 +2486,53 @@ public class CheckIndex implements Closeable {
"\n" +
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
"corruption, else 0.\n");
return 1;
}
if (!assertsOn())
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
if (onlySegments.size() == 0)
onlySegments = null;
else if (doExorcise) {
System.out.println("ERROR: cannot specify both -exorcise and -segment");
return 1;
if (opts.onlySegments.size() == 0) {
opts.onlySegments = null;
} else if (opts.doExorcise) {
throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
}
if (doChecksumsOnly && doCrossCheckTermVectors) {
System.out.println("ERROR: cannot specify both -fast and -crossCheckTermVectors");
return 1;
if (opts.doChecksumsOnly && opts.doCrossCheckTermVectors) {
throw new IllegalArgumentException("ERROR: cannot specify both -fast and -crossCheckTermVectors");
}
System.out.println("\nOpening index @ " + indexPath + "\n");
Directory directory = null;
Path path = Paths.get(indexPath);
try {
if (dirImpl == null) {
directory = FSDirectory.open(path);
} else {
directory = CommandLineUtil.newFSDirectory(dirImpl, path);
}
} catch (Throwable t) {
System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
t.printStackTrace(System.out);
return 1;
return opts;
}
try (Directory dir = directory;
CheckIndex checker = new CheckIndex(dir)) {
checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
checker.setChecksumsOnly(doChecksumsOnly);
checker.setInfoStream(System.out, verbose);
/**
* Actually perform the index check
* @param opts The options to use for this check
* @return 0 iff the index is clean, 1 otherwise
*/
public int doCheck(Options opts) throws IOException, InterruptedException {
setCrossCheckTermVectors(opts.doCrossCheckTermVectors);
setChecksumsOnly(opts.doChecksumsOnly);
setInfoStream(opts.out, opts.verbose);
Status result = checker.checkIndex(onlySegments);
Status result = checkIndex(opts.onlySegments);
if (result.missingSegments) {
return 1;
}
if (!result.clean) {
if (!doExorcise) {
System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
if (!opts.doExorcise) {
opts.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -exorcise were specified\n");
} else {
System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
opts.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
opts.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. YOU WILL LOSE DATA. THIS IS YOUR LAST CHANCE TO CTRL+C!");
for(int s=0;s<5;s++) {
Thread.sleep(1000);
System.out.println(" " + (5-s) + "...");
opts.out.println(" " + (5-s) + "...");
}
System.out.println("Writing...");
checker.exorciseIndex(result);
System.out.println("OK");
System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
opts.out.println("Writing...");
exorciseIndex(result);
opts.out.println("OK");
opts.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
}
}
System.out.println("");
opts.out.println("");
if (result.clean == true) {
return 0;
@ -2481,7 +2540,6 @@ public class CheckIndex implements Closeable {
return 1;
}
}
}
private static double nsToSec(long ns) {
return ns/1000000000.0;

View File

@ -17,177 +17,48 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
public class TestCheckIndex extends LuceneTestCase {
public class TestCheckIndex extends BaseTestCheckIndex {
private Directory directory;
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
}
@Override
public void tearDown() throws Exception {
directory.close();
super.tearDown();
}
@Test
public void testDeletedDocs() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2));
for(int i=0;i<19;i++) {
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
doc.add(newField("field", "aaa"+i, customType));
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.commit();
writer.deleteDocuments(new Term("field","aaa5"));
writer.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString(IOUtils.UTF_8));
fail();
testDeletedDocs(directory);
}
final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
assertTrue(seg.openReaderPassed);
assertNotNull(seg.diagnostics);
assertNotNull(seg.fieldNormStatus);
assertNull(seg.fieldNormStatus.error);
assertEquals(1, seg.fieldNormStatus.totFields);
assertNotNull(seg.termIndexStatus);
assertNull(seg.termIndexStatus.error);
assertEquals(18, seg.termIndexStatus.termCount);
assertEquals(18, seg.termIndexStatus.totFreq);
assertEquals(18, seg.termIndexStatus.totPos);
assertNotNull(seg.storedFieldStatus);
assertNull(seg.storedFieldStatus.error);
assertEquals(18, seg.storedFieldStatus.docCount);
assertEquals(18, seg.storedFieldStatus.totFields);
assertNotNull(seg.termVectorStatus);
assertNull(seg.termVectorStatus.error);
assertEquals(18, seg.termVectorStatus.docCount);
assertEquals(18, seg.termVectorStatus.totVectors);
assertNotNull(seg.diagnostics.get("java.vm.version"));
assertNotNull(seg.diagnostics.get("java.runtime.version"));
assertTrue(seg.diagnostics.size() > 0);
final List<String> onlySegments = new ArrayList<>();
onlySegments.add("_0");
assertTrue(checker.checkIndex(onlySegments).clean == true);
checker.close();
dir.close();
}
// LUCENE-4221: we have to let these thru, for now
@Test
public void testBogusTermVectors() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft);
field.setTokenStream(new CannedTokenStream(
new Token("bar", 5, 10), new Token("bar", 1, 4)
));
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close(); // checkindex
testBogusTermVectors(directory);
}
@Test
public void testChecksumsOnly() throws IOException {
LineFileDocs lf = new LineFileDocs(random());
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (int i = 0; i < 100; i++) {
iw.addDocument(lf.nextDoc());
}
iw.addDocument(new Document());
iw.commit();
iw.close();
lf.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
assertTrue(indexStatus.clean);
checker.close();
dir.close();
analyzer.close();
testChecksumsOnly(directory);
}
@Test
public void testChecksumsOnlyVerbose() throws IOException {
LineFileDocs lf = new LineFileDocs(random());
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (int i = 0; i < 100; i++) {
iw.addDocument(lf.nextDoc());
}
iw.addDocument(new Document());
iw.commit();
iw.close();
lf.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
assertTrue(indexStatus.clean);
checker.close();
dir.close();
analyzer.close();
testChecksumsOnlyVerbose(directory);
}
@Test
public void testObtainsLock() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
iw.addDocument(new Document());
iw.commit();
// keep IW open...
try {
new CheckIndex(dir);
fail("should not have obtained write lock");
} catch (LockObtainFailedException expected) {
// ok
}
iw.close();
dir.close();
testObtainsLock(directory);
}
}

View File

@ -0,0 +1,187 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Base class for CheckIndex tests.
*/
public class BaseTestCheckIndex extends LuceneTestCase {
public void testDeletedDocs(Directory dir) throws IOException {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2));
for(int i=0;i<19;i++) {
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
doc.add(newField("field", "aaa"+i, customType));
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.commit();
writer.deleteDocuments(new Term("field","aaa5"));
writer.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString(IOUtils.UTF_8));
fail();
}
final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
assertTrue(seg.openReaderPassed);
assertNotNull(seg.diagnostics);
assertNotNull(seg.fieldNormStatus);
assertNull(seg.fieldNormStatus.error);
assertEquals(1, seg.fieldNormStatus.totFields);
assertNotNull(seg.termIndexStatus);
assertNull(seg.termIndexStatus.error);
assertEquals(18, seg.termIndexStatus.termCount);
assertEquals(18, seg.termIndexStatus.totFreq);
assertEquals(18, seg.termIndexStatus.totPos);
assertNotNull(seg.storedFieldStatus);
assertNull(seg.storedFieldStatus.error);
assertEquals(18, seg.storedFieldStatus.docCount);
assertEquals(18, seg.storedFieldStatus.totFields);
assertNotNull(seg.termVectorStatus);
assertNull(seg.termVectorStatus.error);
assertEquals(18, seg.termVectorStatus.docCount);
assertEquals(18, seg.termVectorStatus.totVectors);
assertNotNull(seg.diagnostics.get("java.vm.version"));
assertNotNull(seg.diagnostics.get("java.runtime.version"));
assertTrue(seg.diagnostics.size() > 0);
final List<String> onlySegments = new ArrayList<>();
onlySegments.add("_0");
assertTrue(checker.checkIndex(onlySegments).clean == true);
checker.close();
}
// LUCENE-4221: we have to let these thru, for now
public void testBogusTermVectors(Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft);
field.setTokenStream(new CannedTokenStream(
new Token("bar", 5, 10), new Token("bar", 1, 4)
));
doc.add(field);
iw.addDocument(doc);
iw.close();
}
public void testChecksumsOnly(Directory dir) throws IOException {
LineFileDocs lf = new LineFileDocs(random());
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (int i = 0; i < 100; i++) {
iw.addDocument(lf.nextDoc());
}
iw.addDocument(new Document());
iw.commit();
iw.close();
lf.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
assertTrue(indexStatus.clean);
checker.close();
analyzer.close();
}
public void testChecksumsOnlyVerbose(Directory dir) throws IOException {
LineFileDocs lf = new LineFileDocs(random());
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer));
for (int i = 0; i < 100; i++) {
iw.addDocument(lf.nextDoc());
}
iw.addDocument(new Document());
iw.commit();
iw.close();
lf.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos, true, IOUtils.UTF_8));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
assertTrue(indexStatus.clean);
checker.close();
analyzer.close();
}
public void testObtainsLock(Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
iw.addDocument(new Document());
iw.commit();
// keep IW open...
try {
new CheckIndex(dir);
fail("should not have obtained write lock");
} catch (LockObtainFailedException expected) {
// ok
}
iw.close();
}
}

View File

@ -182,6 +182,9 @@ Detailed Change List
New Features
----------------------
* SOLR-7928: Improve CheckIndex to work against HdfsDirectory
(Mike Drob, Gregory Chanan)
Other Changes
----------------------

View File

@ -0,0 +1,80 @@
package org.apache.solr.index.hdfs;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.SuppressForbidden;
import org.apache.solr.core.HdfsDirectoryFactory;
import org.apache.solr.store.hdfs.HdfsDirectory;
import org.apache.solr.util.HdfsUtil;
public class CheckHdfsIndex {
public static void main(String[] args) throws IOException, InterruptedException {
int exitCode = doMain(args);
System.exit(exitCode);
}
// actual main: returns exit code instead of terminating JVM (for easy testing)
@SuppressForbidden(reason = "System.out required: command line tool")
protected static int doMain(String[] args) throws IOException, InterruptedException {
CheckIndex.Options opts;
try {
opts = CheckIndex.parseOptions(args);
} catch (IllegalArgumentException e) {
System.out.println(e.getMessage());
return 1;
}
if (!CheckIndex.assertsOn()) {
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
}
if (opts.getDirImpl() != null) {
System.out.println("\nIgnoring specified -dir-impl, instead using " + HdfsDirectory.class.getSimpleName());
}
System.out.println("\nOpening index @ " + opts.getIndexPath() + "\n");
Directory directory;
try {
directory = new HdfsDirectory(new Path(opts.getIndexPath()), getConf());
} catch (IOException e) {
System.out.println("ERROR: could not open hdfs directory \"" + opts.getIndexPath() + "\"; exiting");
e.printStackTrace(System.out);
return 1;
}
try (Directory dir = directory; CheckIndex checker = new CheckIndex(dir)) {
opts.setOut(System.out);
return checker.doCheck(opts);
}
}
private static Configuration getConf() {
Configuration conf = new Configuration();
String confDir = System.getProperty(HdfsDirectoryFactory.CONFIG_DIRECTORY);
HdfsUtil.addHdfsResources(conf, confDir);
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
return conf;
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* An HDFS CheckIndex implementation.
*/
package org.apache.solr.index.hdfs;

View File

@ -0,0 +1,143 @@
package org.apache.solr.index.hdfs;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.lucene.index.BaseTestCheckIndex;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
import org.apache.solr.cloud.hdfs.HdfsTestUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.store.hdfs.HdfsDirectory;
import org.apache.solr.util.BadHdfsThreadsFilter;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
@ThreadLeakFilters(defaultFilters = true, filters = {
BadHdfsThreadsFilter.class // hdfs currently leaks thread(s)
})
public class CheckHdfsIndexTest extends AbstractFullDistribZkTestBase {
private static MiniDFSCluster dfsCluster;
private static Path path;
private BaseTestCheckIndex testCheckIndex;
private Directory directory;
public CheckHdfsIndexTest() {
super();
sliceCount = 1;
fixShardCount(1);
testCheckIndex = new BaseTestCheckIndex();
}
@BeforeClass
public static void setupClass() throws Exception {
dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath());
path = new Path(HdfsTestUtil.getURI(dfsCluster) + "/solr/");
}
@AfterClass
public static void teardownClass() throws Exception {
HdfsTestUtil.teardownClass(dfsCluster);
dfsCluster = null;
}
@Override
@Before
public void setUp() throws Exception {
super.setUp();
Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
conf.setBoolean("fs.hdfs.impl.disable.cache", true);
directory = new HdfsDirectory(path, NoLockFactory.INSTANCE, conf);
}
@Override
@After
public void tearDown() throws Exception {
directory.close();
dfsCluster.getFileSystem().delete(path, true);
super.tearDown();
}
@Override
protected String getDataDir(String dataDir) throws IOException {
return HdfsTestUtil.getDataDir(dfsCluster, dataDir);
}
@Test
public void doTest() throws Exception {
indexr(id, 1);
commit();
waitForRecoveriesToFinish(false);
String[] args;
{
SolrClient client = clients.get(0);
NamedList<Object> response = client.query(new SolrQuery().setRequestHandler("/admin/system")).getResponse();
NamedList<Object> coreInfo = (NamedList<Object>) response.get("core");
String indexDir = (String) ((NamedList<Object>) coreInfo.get("directory")).get("data") + "/index";
args = new String[] {indexDir};
}
assertEquals("CheckHdfsIndex return status", 0, CheckHdfsIndex.doMain(args));
}
@Test
public void testDeletedDocs() throws IOException {
testCheckIndex.testDeletedDocs(directory);
}
@Test
public void testBogusTermVectors() throws IOException {
testCheckIndex.testBogusTermVectors(directory);
}
@Test
public void testChecksumsOnly() throws IOException {
testCheckIndex.testChecksumsOnly(directory);
}
@Test
public void testChecksumsOnlyVerbose() throws IOException {
testCheckIndex.testChecksumsOnlyVerbose(directory);
}
@Test
@Ignore("We explicitly use a NoLockFactory, so this test doesn't make sense.")
public void testObtainsLock() throws IOException {
testCheckIndex.testObtainsLock(directory);
}
}