mirror of https://github.com/apache/lucene.git
LUCENE-5698 - minor adjustments
This commit is contained in:
parent
6df32fbc35
commit
33896dde5a
|
@ -24,7 +24,6 @@ import java.nio.file.DirectoryStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
@ -32,7 +31,6 @@ import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -64,7 +62,6 @@ import org.apache.lucene.search.similarities.LambdaTTF;
|
||||||
import org.apache.lucene.search.similarities.Normalization;
|
import org.apache.lucene.search.similarities.Normalization;
|
||||||
import org.apache.lucene.search.similarities.NormalizationH1;
|
import org.apache.lucene.search.similarities.NormalizationH1;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.NamedThreadFactory;
|
import org.apache.lucene.util.NamedThreadFactory;
|
||||||
|
@ -77,12 +74,10 @@ import org.junit.Test;
|
||||||
@LuceneTestCase.Monster("takes a lot!")
|
@LuceneTestCase.Monster("takes a lot!")
|
||||||
public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
|
|
||||||
private static final String PATH_TO_20N = "/path/to/20n/";
|
|
||||||
private static final String INDEX = PATH_TO_20N + "index";
|
|
||||||
|
|
||||||
private static final String CATEGORY_FIELD = "category";
|
private static final String CATEGORY_FIELD = "category";
|
||||||
private static final String BODY_FIELD = "body";
|
private static final String BODY_FIELD = "body";
|
||||||
private static final String SUBJECT_FIELD = "subject";
|
private static final String SUBJECT_FIELD = "subject";
|
||||||
|
private static final String INDEX_DIR = "/path/to/lucene-solr/lucene/classification/20n";
|
||||||
|
|
||||||
private static boolean index = true;
|
private static boolean index = true;
|
||||||
private static boolean split = true;
|
private static boolean split = true;
|
||||||
|
@ -108,26 +103,15 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Path mainIndexPath = Paths.get(INDEX + "/original");
|
Directory directory = newDirectory();
|
||||||
Directory directory = FSDirectory.open(mainIndexPath);
|
Directory cv = null;
|
||||||
Path trainPath = Paths.get(INDEX + "/train");
|
Directory test = null;
|
||||||
Path testPath = Paths.get(INDEX + "/test");
|
Directory train = null;
|
||||||
Path cvPath = Paths.get(INDEX + "/cv");
|
|
||||||
FSDirectory cv = null;
|
|
||||||
FSDirectory test = null;
|
|
||||||
FSDirectory train = null;
|
|
||||||
IndexReader testReader = null;
|
IndexReader testReader = null;
|
||||||
if (split) {
|
if (split) {
|
||||||
cv = FSDirectory.open(cvPath);
|
cv = newDirectory();
|
||||||
test = FSDirectory.open(testPath);
|
test = newDirectory();
|
||||||
train = FSDirectory.open(trainPath);
|
train = newDirectory();
|
||||||
}
|
|
||||||
|
|
||||||
if (index) {
|
|
||||||
delete(mainIndexPath);
|
|
||||||
if (split) {
|
|
||||||
delete(trainPath, testPath, cvPath);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexReader reader = null;
|
IndexReader reader = null;
|
||||||
|
@ -141,7 +125,8 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
long startIndex = System.currentTimeMillis();
|
long startIndex = System.currentTimeMillis();
|
||||||
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
|
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
|
||||||
|
|
||||||
int docsIndexed = buildIndex(Paths.get(PATH_TO_20N).getParent(), indexWriter);
|
Path indexDir = Paths.get(INDEX_DIR);
|
||||||
|
int docsIndexed = buildIndex(indexDir, indexWriter);
|
||||||
|
|
||||||
long endIndex = System.currentTimeMillis();
|
long endIndex = System.currentTimeMillis();
|
||||||
System.out.println("Indexed " + docsIndexed + " docs in " + (endIndex - startIndex) / 1000 + "s");
|
System.out.println("Indexed " + docsIndexed + " docs in " + (endIndex - startIndex) / 1000 + "s");
|
||||||
|
@ -157,7 +142,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (index && split) {
|
if (index && split) {
|
||||||
// split the index
|
|
||||||
System.out.println("Splitting the index...");
|
System.out.println("Splitting the index...");
|
||||||
|
|
||||||
long startSplit = System.currentTimeMillis();
|
long startSplit = System.currentTimeMillis();
|
||||||
|
@ -222,6 +206,9 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
directory.close();
|
directory.close();
|
||||||
|
if (testReader != null) {
|
||||||
|
testReader.close();
|
||||||
|
}
|
||||||
if (test != null) {
|
if (test != null) {
|
||||||
test.close();
|
test.close();
|
||||||
}
|
}
|
||||||
|
@ -231,9 +218,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
if (cv != null) {
|
if (cv != null) {
|
||||||
cv.close();
|
cv.close();
|
||||||
}
|
}
|
||||||
if (testReader != null) {
|
|
||||||
testReader.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Classifier<BytesRef> c : classifiers) {
|
for (Classifier<BytesRef> c : classifiers) {
|
||||||
if (c instanceof Closeable) {
|
if (c instanceof Closeable) {
|
||||||
|
@ -264,19 +248,6 @@ public final class Test20NewsgroupsClassification extends LuceneTestCase {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void delete(Path... paths) throws IOException {
|
|
||||||
for (Path path : paths) {
|
|
||||||
if (Files.isDirectory(path)) {
|
|
||||||
Stream<Path> pathStream = Files.list(path);
|
|
||||||
Iterator<Path> iterator = pathStream.iterator();
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
Files.delete(iterator.next());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private int buildIndex(Path indexDir, IndexWriter indexWriter)
|
private int buildIndex(Path indexDir, IndexWriter indexWriter)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
Loading…
Reference in New Issue