LUCENE-5698 - minor adjustments

This commit is contained in:
Tommaso Teofili 2019-01-24 10:24:52 +01:00
parent 6df32fbc35
commit 33896dde5a
1 changed files with 13 additions and 42 deletions

View File

@ -24,7 +24,6 @@ import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;
@ -32,7 +31,6 @@ import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import org.apache.lucene.analysis.Analyzer;
@ -64,7 +62,6 @@ import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
@ -77,12 +74,10 @@ import org.junit.Test;
@LuceneTestCase.Monster("takes a lot!")
public final class Test20NewsgroupsClassification extends LuceneTestCase {
private static final String PATH_TO_20N = "/path/to/20n/";
private static final String INDEX = PATH_TO_20N + "index";
private static final String CATEGORY_FIELD = "category";
private static final String BODY_FIELD = "body";
private static final String SUBJECT_FIELD = "subject";
private static final String INDEX_DIR = "/path/to/lucene-solr/lucene/classification/20n";
private static boolean index = true;
private static boolean split = true;
@ -108,26 +103,15 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
}
}
Path mainIndexPath = Paths.get(INDEX + "/original");
Directory directory = FSDirectory.open(mainIndexPath);
Path trainPath = Paths.get(INDEX + "/train");
Path testPath = Paths.get(INDEX + "/test");
Path cvPath = Paths.get(INDEX + "/cv");
FSDirectory cv = null;
FSDirectory test = null;
FSDirectory train = null;
Directory directory = newDirectory();
Directory cv = null;
Directory test = null;
Directory train = null;
IndexReader testReader = null;
if (split) {
cv = FSDirectory.open(cvPath);
test = FSDirectory.open(testPath);
train = FSDirectory.open(trainPath);
}
if (index) {
delete(mainIndexPath);
if (split) {
delete(trainPath, testPath, cvPath);
}
cv = newDirectory();
test = newDirectory();
train = newDirectory();
}
IndexReader reader = null;
@ -141,7 +125,8 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
long startIndex = System.currentTimeMillis();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
int docsIndexed = buildIndex(Paths.get(PATH_TO_20N).getParent(), indexWriter);
Path indexDir = Paths.get(INDEX_DIR);
int docsIndexed = buildIndex(indexDir, indexWriter);
long endIndex = System.currentTimeMillis();
System.out.println("Indexed " + docsIndexed + " docs in " + (endIndex - startIndex) / 1000 + "s");
@ -157,7 +142,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
}
if (index && split) {
// split the index
System.out.println("Splitting the index...");
long startSplit = System.currentTimeMillis();
@ -222,6 +206,9 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
reader.close();
}
directory.close();
if (testReader != null) {
testReader.close();
}
if (test != null) {
test.close();
}
@ -231,9 +218,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
if (cv != null) {
cv.close();
}
if (testReader != null) {
testReader.close();
}
for (Classifier<BytesRef> c : classifiers) {
if (c instanceof Closeable) {
@ -264,19 +248,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
}));
}
private void delete(Path... paths) throws IOException {
for (Path path : paths) {
if (Files.isDirectory(path)) {
Stream<Path> pathStream = Files.list(path);
Iterator<Path> iterator = pathStream.iterator();
while (iterator.hasNext()) {
Files.delete(iterator.next());
}
}
}
}
private int buildIndex(Path indexDir, IndexWriter indexWriter)
throws IOException {
int i = 0;