LUCENE-6829: OfflineSorter now uses Directory API; add Directory.createTempOutput and IndexOutput.getName

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1708760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-10-15 09:58:18 +00:00
parent 673654321b
commit c855dd5a33
76 changed files with 1365 additions and 988 deletions

View File

@ -59,6 +59,13 @@ API Changes
* LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
* LUCENE-6829: OfflineSorter, and the classes that use it (suggesters,
hunspell) now do all temporary file IO via Directory instead of
directly through java's temp dir. Directory.createTempOutput
creates a uniquely named IndexOutput, and the new
IndexOutput.getName returns its name (Dawid Weiss, Robert Muir, Mike
McCandless)
Changes in Runtime Behavior
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.

View File

@ -17,7 +17,39 @@ package org.apache.lucene.analysis.hunspell;
* limitations under the License.
*/
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -39,34 +71,6 @@ import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* In-memory structure for the dictionary (.dic) and affix (.aff)
* data of a hunspell dictionary.
@ -139,7 +143,7 @@ public class Dictionary {
// when set, some words have exceptional stems, and the last entry is a pointer to stemExceptions
boolean hasStemExceptions;
private final Path tempDir = OfflineSorter.getDefaultTempDir(); // TODO: make this configurable?
private final Path tempPath = getDefaultTempDir(); // TODO: make this configurable?
boolean ignoreCase;
boolean complexPrefixes;
@ -167,19 +171,21 @@ public class Dictionary {
String language;
// true if case algorithms should use alternate (Turkish/Azeri) mapping
boolean alternateCasing;
/**
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
* and dictionary files.
* You have to close the provided InputStreams yourself.
*
* @param tempDir Directory to use for offline sorting
* @param tempFileNamePrefix prefix to use to generate temp file names
* @param affix InputStream for reading the hunspell affix file (won't be closed).
* @param dictionary InputStream for reading the hunspell dictionary file (won't be closed).
* @throws IOException Can be thrown while reading from the InputStreams
* @throws ParseException Can be thrown if the content of the files does not meet expected formats
*/
public Dictionary(InputStream affix, InputStream dictionary) throws IOException, ParseException {
this(affix, Collections.singletonList(dictionary), false);
public Dictionary(Directory tempDir, String tempFileNamePrefix, InputStream affix, InputStream dictionary) throws IOException, ParseException {
this(tempDir, tempFileNamePrefix, affix, Collections.singletonList(dictionary), false);
}
/**
@ -187,18 +193,20 @@ public class Dictionary {
* and dictionary files.
* You have to close the provided InputStreams yourself.
*
* @param tempDir Directory to use for offline sorting
* @param tempFileNamePrefix prefix to use to generate temp file names
* @param affix InputStream for reading the hunspell affix file (won't be closed).
* @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
* @throws IOException Can be thrown while reading from the InputStreams
* @throws ParseException Can be thrown if the content of the files does not meet expected formats
*/
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
public Dictionary(Directory tempDir, String tempFileNamePrefix, InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
this.ignoreCase = ignoreCase;
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
flagLookup.add(new BytesRef()); // no flags -> ord 0
Path aff = Files.createTempFile(tempDir, "affix", "aff");
Path aff = Files.createTempFile(tempPath, "affix", "aff");
OutputStream out = new BufferedOutputStream(Files.newOutputStream(aff));
InputStream aff1 = null;
InputStream aff2 = null;
@ -224,7 +232,7 @@ public class Dictionary {
// read dictionary entries
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
Builder<IntsRef> b = new Builder<>(FST.INPUT_TYPE.BYTE4, o);
readDictionaryFiles(dictionaries, decoder, b);
readDictionaryFiles(tempDir, tempFileNamePrefix, dictionaries, decoder, b);
words = b.finish();
aliases = null; // no longer needed
morphAliases = null; // no longer needed
@ -766,7 +774,7 @@ public class Dictionary {
return Math.max(pos1, pos2);
}
}
/**
* Reads the dictionary file through the provided InputStreams, building up the words map
*
@ -774,13 +782,13 @@ public class Dictionary {
* @param decoder CharsetDecoder used to decode the contents of the file
* @throws IOException Can be thrown while reading from the file
*/
private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException {
private void readDictionaryFiles(Directory tempDir, String tempFileNamePrefix, List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException {
BytesRefBuilder flagsScratch = new BytesRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder();
StringBuilder sb = new StringBuilder();
Path unsorted = Files.createTempFile(tempDir, "unsorted", "dat");
IndexOutput unsorted = tempDir.createTempOutput(tempFileNamePrefix, "dat", IOContext.DEFAULT);
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
for (InputStream dictionary : dictionaries) {
BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
@ -823,9 +831,8 @@ public class Dictionary {
}
}
}
Path sorted = Files.createTempFile(tempDir, "sorted", "dat");
OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, new Comparator<BytesRef>() {
BytesRef scratch1 = new BytesRef();
BytesRef scratch2 = new BytesRef();
@ -862,21 +869,23 @@ public class Dictionary {
}
}
});
String sorted;
boolean success = false;
try {
sorter.sort(unsorted, sorted);
sorted = sorter.sort(unsorted.getName());
success = true;
} finally {
if (success) {
Files.delete(unsorted);
tempDir.deleteFile(unsorted.getName());
} else {
IOUtils.deleteFilesIgnoringExceptions(unsorted);
IOUtils.deleteFilesIgnoringExceptions(tempDir, unsorted.getName());
}
}
boolean success2 = false;
ByteSequencesReader reader = new ByteSequencesReader(sorted);
try {
try (ByteSequencesReader reader = new ByteSequencesReader(tempDir.openInput(sorted, IOContext.READONCE))) {
BytesRefBuilder scratchLine = new BytesRefBuilder();
// TODO: the flags themselves can be double-chars (long) or also numeric
@ -956,11 +965,10 @@ public class Dictionary {
words.add(scratchInts.get(), currentOrds.get());
success2 = true;
} finally {
IOUtils.closeWhileHandlingException(reader);
if (success2) {
Files.delete(sorted);
tempDir.deleteFile(sorted);
} else {
IOUtils.deleteFilesIgnoringExceptions(sorted);
IOUtils.deleteFilesIgnoringExceptions(tempDir, sorted);
}
}
}
@ -1245,4 +1253,33 @@ public class Dictionary {
public boolean getIgnoreCase() {
return ignoreCase;
}
private static Path DEFAULT_TEMP_DIR;
/** Used by test framework */
public static void setDefaultTempDir(Path tempDir) {
DEFAULT_TEMP_DIR = tempDir;
}
/**
* Returns the default temporary directory. By default, java.io.tmpdir. If not accessible
* or not available, an IOException is thrown
*/
synchronized static Path getDefaultTempDir() throws IOException {
if (DEFAULT_TEMP_DIR == null) {
// Lazy init
String tempDirPath = System.getProperty("java.io.tmpdir");
if (tempDirPath == null) {
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
}
Path tempDirectory = Paths.get(tempDirPath);
if (Files.isWritable(tempDirectory) == false) {
throw new IOException("Java's temporary folder not present or writeable?: "
+ tempDirectory.toAbsolutePath());
}
DEFAULT_TEMP_DIR = tempDirectory;
}
return DEFAULT_TEMP_DIR;
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.analysis.hunspell;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
@ -28,6 +30,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.IOUtils;
/**
@ -48,6 +52,7 @@ import org.apache.lucene.util.IOUtils;
public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
private static final String PARAM_DICTIONARY = "dictionary";
private static final String PARAM_AFFIX = "affix";
// NOTE: this one is currently unused?:
private static final String PARAM_RECURSION_CAP = "recursionCap";
private static final String PARAM_IGNORE_CASE = "ignoreCase";
private static final String PARAM_LONGEST_ONLY = "longestOnly";
@ -91,7 +96,12 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
}
affix = loader.openResource(affixFile);
this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
Path tempPath = Files.createTempDirectory(Dictionary.getDefaultTempDir(), "Hunspell");
try (Directory tempDir = FSDirectory.open(tempPath)) {
this.dictionary = new Dictionary(tempDir, "hunspell", affix, dictionaries, ignoreCase);
} finally {
IOUtils.rm(tempPath);
}
} catch (ParseException e) {
throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
} finally {

View File

@ -85,6 +85,7 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CharsRef;
@ -435,7 +436,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
try {
return new Dictionary(affixStream, dictStream);
return new Dictionary(new RAMDirectory(), "dictionary", affixStream, dictStream);
} catch (Exception ex) {
Rethrow.rethrow(ex);
return null; // unreachable code

View File

@ -24,6 +24,7 @@ import java.text.ParseException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@ -61,7 +62,7 @@ public abstract class StemmerTestBase extends LuceneTestCase {
}
try {
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
Dictionary dictionary = new Dictionary(new RAMDirectory(), "dictionary", affixStream, Arrays.asList(dictStreams), ignoreCase);
stemmer = new Stemmer(dictionary);
} finally {
IOUtils.closeWhileHandlingException(affixStream);

View File

@ -24,6 +24,8 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.LuceneTestCase;
@ -51,8 +53,11 @@ public class Test64kAffixes extends LuceneTestCase {
dictWriter.write("1\ndrink/2\n");
dictWriter.close();
try (InputStream affStream = Files.newInputStream(affix); InputStream dictStream = Files.newInputStream(dict)) {
Dictionary dictionary = new Dictionary(affStream, dictStream);
try (InputStream affStream = Files.newInputStream(affix); InputStream dictStream = Files.newInputStream(dict); Directory tempDir2 = newDirectory()) {
if (tempDir2 instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) tempDir2).setEnableVirusScanner(false);
}
Dictionary dictionary = new Dictionary(tempDir2, "dictionary", affStream, dictStream);
Stemmer stemmer = new Stemmer(dictionary);
// drinks should still stem to drink
List<CharsRef> stems = stemmer.stem("drinks");

View File

@ -22,7 +22,8 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
@ -165,14 +166,14 @@ public class TestAllDictionaries extends LuceneTestCase {
IOUtils.rm(tmp);
Files.createDirectory(tmp);
try (InputStream in = Files.newInputStream(f)) {
try (InputStream in = Files.newInputStream(f); Directory tempDir = getDirectory()) {
TestUtil.unzip(in, tmp);
Path dicEntry = tmp.resolve(tests[i+1]);
Path affEntry = tmp.resolve(tests[i+2]);
try (InputStream dictionary = Files.newInputStream(dicEntry);
InputStream affix = Files.newInputStream(affEntry)) {
Dictionary dic = new Dictionary(affix, dictionary);
Dictionary dic = new Dictionary(tempDir, "dictionary", affix, dictionary);
System.out.println(tests[i] + "\t" + RamUsageTester.humanSizeOf(dic) + "\t(" +
"words=" + RamUsageTester.humanSizeOf(dic.words) + ", " +
"flags=" + RamUsageTester.humanSizeOf(dic.flagLookup) + ", " +
@ -204,11 +205,20 @@ public class TestAllDictionaries extends LuceneTestCase {
Path affEntry = tmp.resolve(tests[i+2]);
try (InputStream dictionary = Files.newInputStream(dicEntry);
InputStream affix = Files.newInputStream(affEntry)) {
new Dictionary(affix, dictionary);
InputStream affix = Files.newInputStream(affEntry);
Directory tempDir = getDirectory()) {
new Dictionary(tempDir, "dictionary", affix, dictionary);
}
}
}
}
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -22,12 +22,13 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import org.junit.Ignore;
/**
@ -186,8 +187,12 @@ public class TestAllDictionaries2 extends LuceneTestCase {
Path affEntry = tmp.resolve(tests[i+2]);
try (InputStream dictionary = Files.newInputStream(dicEntry);
InputStream affix = Files.newInputStream(affEntry)) {
Dictionary dic = new Dictionary(affix, dictionary);
InputStream affix = Files.newInputStream(affEntry);
Directory tempDir = newDirectory()) {
if (tempDir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
}
Dictionary dic = new Dictionary(tempDir, "dictionary", affix, dictionary);
System.out.println(tests[i] + "\t" + RamUsageTester.humanSizeOf(dic) + "\t(" +
"words=" + RamUsageTester.humanSizeOf(dic.words) + ", " +
"flags=" + RamUsageTester.humanSizeOf(dic.flagLookup) + ", " +
@ -219,8 +224,12 @@ public class TestAllDictionaries2 extends LuceneTestCase {
Path affEntry = tmp.resolve(tests[i+2]);
try (InputStream dictionary = Files.newInputStream(dicEntry);
InputStream affix = Files.newInputStream(affEntry)) {
new Dictionary(affix, dictionary);
InputStream affix = Files.newInputStream(affEntry);
Directory tempDir = newDirectory()) {
if (tempDir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
}
new Dictionary(tempDir, "dictionary", affix, dictionary);
}
}
}

View File

@ -24,9 +24,10 @@ import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
@ -41,8 +42,9 @@ public class TestDictionary extends LuceneTestCase {
public void testSimpleDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("simple.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(affixStream, dictStream);
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
@ -63,13 +65,15 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Dictionary dictionary = new Dictionary(affixStream, dictStream);
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
@ -80,13 +84,15 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedBeforeSetDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(affixStream, dictStream);
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
@ -97,13 +103,15 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedEmptyAliasDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(affixStream, dictStream);
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
@ -114,15 +122,17 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
// malformed rule causes ParseException
public void testInvalidData() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("broken.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
try {
new Dictionary(affixStream, dictStream);
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
fail("didn't get expected exception");
} catch (ParseException expected) {
assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
@ -131,15 +141,17 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
// malformed flags causes ParseException
public void testInvalidFlags() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
try {
new Dictionary(affixStream, dictStream);
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
fail("didn't get expected exception");
} catch (Exception expected) {
assertTrue(expected.getMessage().startsWith("expected only one flag"));
@ -147,6 +159,7 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
tempDir.close();
}
private class CloseCheckInputStream extends FilterInputStream {
@ -170,21 +183,22 @@ public class TestDictionary extends LuceneTestCase {
public void testResourceCleanup() throws Exception {
CloseCheckInputStream affixStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.aff"));
CloseCheckInputStream dictStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.dic"));
Directory tempDir = getDirectory();
new Dictionary(affixStream, dictStream);
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertFalse(affixStream.isClosed());
assertFalse(dictStream.isClosed());
affixStream.close();
dictStream.close();
tempDir.close();
assertTrue(affixStream.isClosed());
assertTrue(dictStream.isClosed());
}
public void testReplacements() throws Exception {
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
@ -244,4 +258,12 @@ public class TestDictionary extends LuceneTestCase {
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -27,10 +27,10 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -43,11 +43,14 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
// no multiple try-with to workaround bogus VerifyError
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
try {
dictionary = new Dictionary(affixStream, dictStream);
dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
} finally {
IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
tempDir.close();
}
@AfterClass
@ -107,8 +110,9 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
// no multiple try-with to workaround bogus VerifyError
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
try {
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
d = new Dictionary(tempDir, "dictionary", affixStream, Collections.singletonList(dictStream), true);
} finally {
IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
@ -121,5 +125,14 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
};
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
a.close();
tempDir.close();
}
private static Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -140,6 +140,9 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
@Override
public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); }
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { throw new UnsupportedOperationException(); }
@Override
public void sync(Collection<String> names) { throw new UnsupportedOperationException(); }

View File

@ -29,12 +29,12 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.IOUtils;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* Class for accessing a compound stream.
@ -172,6 +172,11 @@ final class Lucene50CompoundReader extends Directory {
public IndexOutput createOutput(String name, IOContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void sync(Collection<String> names) {

View File

@ -32,8 +32,8 @@ import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@ -57,10 +57,10 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.LockValidatingDirectoryWrapper;
import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.store.RateLimitedIndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.store.LockValidatingDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -2615,7 +2615,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L);
info.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
trackingDir.getCreatedFiles().clear();
trackingDir.clearCreatedFiles();
setDiagnostics(info, SOURCE_ADDINDEXES_READERS);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.store;
*/
import java.io.IOException;
import java.util.Random;
/**
* Base implementation for a concrete {@link Directory} that uses a {@link LockFactory} for locking.
@ -31,6 +32,22 @@ public abstract class BaseDirectory extends Directory {
* this Directory instance). */
protected final LockFactory lockFactory;
/** Subclasses can use this to generate temp file name candidates */
protected static final Random tempFileRandom;
static {
String prop = System.getProperty("tests.seed");
int seed;
if (prop != null) {
// So if there is a test failure that relied on temp file names,
//we remain reproducible based on the test seed:
seed = prop.hashCode();
} else {
seed = (int) System.currentTimeMillis();
}
tempFileRandom = new Random(seed);
}
/** Sole constructor. */
protected BaseDirectory(LockFactory lockFactory) {
super();

View File

@ -17,9 +17,9 @@ package org.apache.lucene.store;
* limitations under the License.
*/
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Closeable;
import java.nio.file.NoSuchFileException;
import java.util.Collection; // for javadocs
@ -70,8 +70,12 @@ public abstract class Directory implements Closeable {
/** Creates a new, empty file in the directory with the given name.
Returns a stream writing this file. */
public abstract IndexOutput createOutput(String name, IOContext context)
throws IOException;
public abstract IndexOutput createOutput(String name, IOContext context) throws IOException;
/** Creates a new, empty file for writing in the directory, with a
* temporary file name derived from prefix and suffix. Use
* {@link IndexOutput#getName} to see what name was used. */
public abstract IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException;
/**
* Ensure that any writes to these files are moved to
@ -120,8 +124,7 @@ public abstract class Directory implements Closeable {
/** Closes the store. */
@Override
public abstract void close()
throws IOException;
public abstract void close() throws IOException;
@Override
public String toString() {

View File

@ -19,11 +19,14 @@ package org.apache.lucene.store;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.nio.channels.ClosedChannelException; // javadoc @link
import java.nio.file.DirectoryStream;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.channels.ClosedChannelException; // javadoc @link
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -220,11 +223,23 @@ public abstract class FSDirectory extends BaseDirectory {
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
ensureCanWrite(name);
return new FSIndexOutput(name);
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
ensureOpen();
while (true) {
String name = prefix + tempFileRandom.nextInt(Integer.MAX_VALUE) + "." + suffix;
try {
return new FSIndexOutput(name, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW);
} catch (FileAlreadyExistsException faee) {
// Retry with next random name
}
}
}
protected void ensureCanWrite(String name) throws IOException {
Files.deleteIfExists(directory.resolve(name)); // delete existing, if any
}
@ -273,7 +288,11 @@ public abstract class FSDirectory extends BaseDirectory {
static final int CHUNK_SIZE = 8192;
public FSIndexOutput(String name) throws IOException {
super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", new FilterOutputStream(Files.newOutputStream(directory.resolve(name))) {
this(name, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE);
}
FSIndexOutput(String name, OpenOption... options) throws IOException {
super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", name, new FilterOutputStream(Files.newOutputStream(directory.resolve(name), options)) {
// This implementation ensures, that we never write more than CHUNK_SIZE bytes:
@Override
public void write(byte[] b, int offset, int length) throws IOException {

View File

@ -22,13 +22,12 @@ import java.nio.file.AtomicMoveNotSupportedException;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import org.apache.lucene.util.IOUtils;
/**
* Expert: A Directory instance that switches files between
* two other Directory instances.
@ -155,6 +154,11 @@ public class FileSwitchDirectory extends Directory {
return getDirectory(name).createOutput(name, context);
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
return getDirectory("."+suffix).createTempOutput(prefix, suffix, context);
}
@Override
public void sync(Collection<String> names) throws IOException {
List<String> primaryNames = new ArrayList<>();

View File

@ -73,6 +73,11 @@ public class FilterDirectory extends Directory {
return in.createOutput(name, context);
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
return in.createTempOutput(prefix, suffix, context);
}
@Override
public void sync(Collection<String> names) throws IOException {
in.sync(names);

View File

@ -31,15 +31,27 @@ import java.io.IOException;
*/
public abstract class IndexOutput extends DataOutput implements Closeable {
/** Full description of this output, e.g. which class such as {@code FSIndexOutput}, and the full path to the file */
private final String resourceDescription;
/** Just the name part from {@code resourceDescription} */
private final String name;
/** Sole constructor. resourceDescription should be non-null, opaque string
* describing this resource; it's returned from {@link #toString}. */
protected IndexOutput(String resourceDescription) {
protected IndexOutput(String resourceDescription, String name) {
if (resourceDescription == null) {
throw new IllegalArgumentException("resourceDescription must not be null");
}
this.resourceDescription = resourceDescription;
this.name = name;
}
/** Returns the name used to create this {@code IndexOutput}. This is especially useful when using
* {@link Directory#createTempOutput}. */
// TODO: can we somehow use this as the default resource description or something?
public String getName() {
return name;
}
/** Closes this stream to further operations. */

View File

@ -37,8 +37,8 @@ public class OutputStreamIndexOutput extends IndexOutput {
* @param bufferSize the buffer size in bytes used to buffer writes internally.
* @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
*/
public OutputStreamIndexOutput(String resourceDescription, OutputStream out, int bufferSize) {
super(resourceDescription);
public OutputStreamIndexOutput(String resourceDescription, String name, OutputStream out, int bufferSize) {
super(resourceDescription, name);
this.os = new BufferedOutputStream(new CheckedOutputStream(out, crc), bufferSize);
}

View File

@ -17,8 +17,8 @@ package org.apache.lucene.store;
* limitations under the License.
*/
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
@ -26,12 +26,12 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
/**
* A memory-resident {@link Directory} implementation. Locking
* implementation is by default the {@link SingleInstanceLockFactory}.
@ -111,10 +111,7 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
// and do not synchronize or anything stronger. it's great for testing!
// NOTE: fileMap.keySet().toArray(new String[0]) is broken in non Sun JDKs,
// and the code below is resilient to map changes during the array population.
Set<String> fileNames = fileMap.keySet();
List<String> names = new ArrayList<>(fileNames.size());
for (String name : fileNames) names.add(name);
return names.toArray(new String[names.size()]);
return fileMap.keySet().toArray(new String[fileMap.size()]);
}
public final boolean fileNameExists(String name) {
@ -150,9 +147,6 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
return Accountables.namedAccountables("file", fileMap);
}
/** Removes an existing file in the directory.
* @throws IOException if the file does not exist
*/
@Override
public void deleteFile(String name) throws IOException {
ensureOpen();
@ -165,7 +159,6 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
}
}
/** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
@ -179,6 +172,22 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
return new RAMOutputStream(name, file, true);
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
ensureOpen();
// Make the file first...
RAMFile file = newRAMFile();
// ... then try to find a unique name for it:
while (true) {
String name = prefix + tempFileRandom.nextInt(Integer.MAX_VALUE) + "." + suffix;
if (fileMap.putIfAbsent(name, file) == null) {
return new RAMOutputStream(name, file, true);
}
}
}
/**
* Returns a new {@link RAMFile} for storing data. This method can be
* overridden to return different {@link RAMFile} impls, that e.g. override

View File

@ -57,7 +57,7 @@ public class RAMOutputStream extends IndexOutput implements Accountable {
/** Creates this, with specified name. */
public RAMOutputStream(String name, RAMFile f, boolean checksum) {
super("RAMOutputStream(name=\"" + name + "\")");
super("RAMOutputStream(name=\"" + name + "\")", name);
file = f;
// make sure that we switch to the

View File

@ -38,7 +38,7 @@ public final class RateLimitedIndexOutput extends IndexOutput {
private long currentMinPauseCheckBytes;
public RateLimitedIndexOutput(final RateLimiter rateLimiter, final IndexOutput delegate) {
super("RateLimitedIndexOutput(" + delegate + ")");
super("RateLimitedIndexOutput(" + delegate + ")", delegate.getName());
this.delegate = delegate;
this.rateLimiter = rateLimiter;
this.currentMinPauseCheckBytes = rateLimiter.getMinPauseCheckBytes();

View File

@ -45,6 +45,14 @@ public final class TrackingDirectoryWrapper extends FilterDirectory {
return output;
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context)
throws IOException {
IndexOutput tempOutput = in.createTempOutput(prefix, suffix, context);
createdFileNames.add(tempOutput.getName());
return tempOutput;
}
@Override
public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException {
in.copyFrom(from, src, dest, context);
@ -60,10 +68,12 @@ public final class TrackingDirectoryWrapper extends FilterDirectory {
}
}
// maybe clone before returning.... all callers are
// cloning anyway....
/** NOTE: returns a copy of the created files. */
public Set<String> getCreatedFiles() {
return createdFileNames;
return new HashSet<>(createdFileNames);
}
public void clearCreatedFiles() {
createdFileNames.clear();
}
}

View File

@ -190,7 +190,7 @@ public final class IOUtils {
* <p>
* Note that the files should not be null.
*/
public static void deleteFilesIgnoringExceptions(Directory dir, String... files) {
public static void deleteFilesIgnoringExceptions(Directory dir, Collection<String> files) {
for (String name : files) {
try {
dir.deleteFile(name);
@ -199,6 +199,42 @@ public final class IOUtils {
}
}
}
public static void deleteFilesIgnoringExceptions(Directory dir, String... files) {
deleteFilesIgnoringExceptions(dir, Arrays.asList(files));
}
/**
* Deletes all given file names. Some of the
* file names may be null; they are
* ignored. After everything is deleted, the method either
* throws the first exception it hit while deleting, or
* completes normally if there were no exceptions.
*
* @param dir Directory to delete files from
* @param files file names to delete
*/
public static void deleteFiles(Directory dir, Collection<String> files) throws IOException {
Throwable th = null;
for (String name : files) {
if (name != null) {
try {
dir.deleteFile(name);
} catch (Throwable t) {
addSuppressed(th, t);
if (th == null) {
th = t;
}
}
}
}
reThrow(th);
}
public static void deleteFiles(Directory dir, String... files) throws IOException {
deleteFiles(dir, Arrays.asList(files));
}
/**
* Deletes all given files, suppressing all thrown IOExceptions.

View File

@ -17,24 +17,20 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
/**
* On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
* fields:
@ -43,14 +39,12 @@ import java.util.Locale;
* <li>exactly the above count of bytes for the sequence to be sorted.
* </ul>
*
* @see #sort(Path, Path)
* @see #sort(String)
* @lucene.experimental
* @lucene.internal
*/
public final class OfflineSorter {
private static Path DEFAULT_TEMP_DIR;
/** Convenience constant for megabytes */
public final static long MB = 1024 * 1024;
/** Convenience constant for gigabytes */
@ -72,6 +66,10 @@ public final class OfflineSorter {
*/
public final static int MAX_TEMPFILES = 128;
private final Directory dir;
private final String tempFileNamePrefix;
/**
* A bit more descriptive unit for constructors.
*
@ -142,7 +140,7 @@ public final class OfflineSorter {
/** number of partition merges */
public int mergeRounds;
/** number of lines of data read */
public int lines;
public int lineCount;
/** time spent merging sorted partitions (in milliseconds) */
public long mergeTime;
/** time spent sorting data (in milliseconds) */
@ -162,17 +160,16 @@ public final class OfflineSorter {
return String.format(Locale.ROOT,
"time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB",
totalTime / 1000.0d, readTime / 1000.0d, sortTime / 1000.0d, mergeTime / 1000.0d,
lines, tempMergeFiles, mergeRounds,
lineCount, tempMergeFiles, mergeRounds,
(double) bufferSize / MB);
}
}
private final BufferSize ramBufferSize;
private final Path tempDirectory;
private final Counter bufferBytesUsed = Counter.newCounter();
private final BytesRefArray buffer = new BytesRefArray(bufferBytesUsed);
private SortInfo sortInfo;
SortInfo sortInfo;
private int maxTempFiles;
private final Comparator<BytesRef> comparator;
@ -182,27 +179,25 @@ public final class OfflineSorter {
/**
* Defaults constructor.
*
* @see #getDefaultTempDir()
* @see BufferSize#automatic()
*/
public OfflineSorter() throws IOException {
this(DEFAULT_COMPARATOR, BufferSize.automatic(), getDefaultTempDir(), MAX_TEMPFILES);
public OfflineSorter(Directory dir, String tempFileNamePrefix) throws IOException {
this(dir, tempFileNamePrefix, DEFAULT_COMPARATOR, BufferSize.automatic(), MAX_TEMPFILES);
}
/**
* Defaults constructor with a custom comparator.
*
* @see #getDefaultTempDir()
* @see BufferSize#automatic()
*/
public OfflineSorter(Comparator<BytesRef> comparator) throws IOException {
this(comparator, BufferSize.automatic(), getDefaultTempDir(), MAX_TEMPFILES);
public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator) throws IOException {
this(dir, tempFileNamePrefix, comparator, BufferSize.automatic(), MAX_TEMPFILES);
}
/**
* All-details constructor.
*/
public OfflineSorter(Comparator<BytesRef> comparator, BufferSize ramBufferSize, Path tempDirectory, int maxTempfiles) {
public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator, BufferSize ramBufferSize, int maxTempfiles) {
if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
}
@ -212,160 +207,129 @@ public final class OfflineSorter {
}
this.ramBufferSize = ramBufferSize;
this.tempDirectory = tempDirectory;
this.maxTempFiles = maxTempfiles;
this.comparator = comparator;
this.dir = dir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
/** Returns the {@link Directory} we use to create temp files. */
public Directory getDirectory() {
return dir;
}
/** Returns the temp file name prefix passed to {@link Directory#createTempOutput} to generate temporary files. */
public String getTempFileNamePrefix() {
return tempFileNamePrefix;
}
/**
* Sort input to output, explicit hint for the buffer size. The amount of allocated
* memory may deviate from the hint (may be smaller or larger).
* Sort input to a new temp file, returning its name.
*/
public SortInfo sort(Path input, Path output) throws IOException {
public String sort(String inputFileName) throws IOException {
sortInfo = new SortInfo();
sortInfo.totalTime = System.currentTimeMillis();
// NOTE: don't remove output here: its existence (often created by the caller
// up above using Files.createTempFile) prevents another concurrent caller
// of this API (from a different thread) from incorrectly re-using this file name
List<String> segments = new ArrayList<>();
ArrayList<Path> merges = new ArrayList<>();
boolean success3 = false;
try {
ByteSequencesReader is = new ByteSequencesReader(input);
boolean success = false;
try {
int lines = 0;
while ((lines = readPartition(is)) > 0) {
merges.add(sortPartition(lines));
sortInfo.tempMergeFiles++;
sortInfo.lines += lines;
// So we can remove any partially written temp files on exception:
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
// Handle intermediate merges.
if (merges.size() == maxTempFiles) {
Path intermediate = Files.createTempFile(tempDirectory, "sort", "intermediate");
boolean success2 = false;
try {
mergePartitions(merges, intermediate);
success2 = true;
} finally {
if (success2) {
IOUtils.deleteFilesIfExist(merges);
} else {
IOUtils.deleteFilesIgnoringExceptions(merges);
}
merges.clear();
merges.add(intermediate);
}
sortInfo.tempMergeFiles++;
}
}
success = true;
} finally {
if (success) {
IOUtils.close(is);
} else {
IOUtils.closeWhileHandlingException(is);
boolean success = false;
try (ByteSequencesReader is = new ByteSequencesReader(dir.openInput(inputFileName, IOContext.READONCE))) {
int lineCount;
while ((lineCount = readPartition(is)) > 0) {
segments.add(sortPartition(trackingDir));
sortInfo.tempMergeFiles++;
sortInfo.lineCount += lineCount;
// Handle intermediate merges.
if (segments.size() == maxTempFiles) {
mergePartitions(trackingDir, segments);
}
}
// One partition, try to rename or copy if unsuccessful.
if (merges.size() == 1) {
Files.move(merges.get(0), output, StandardCopyOption.REPLACE_EXISTING);
} else {
// otherwise merge the partitions with a priority queue.
mergePartitions(merges, output);
// Merge the partitions to the output file with a priority queue.
if (segments.size() > 1) {
mergePartitions(trackingDir, segments);
}
success3 = true;
} finally {
if (success3) {
IOUtils.deleteFilesIfExist(merges);
String result;
if (segments.isEmpty()) {
try (IndexOutput out = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
result = out.getName();
}
} else {
IOUtils.deleteFilesIgnoringExceptions(merges);
IOUtils.deleteFilesIgnoringExceptions(output);
result = segments.get(0);
}
// We should be explicitly removing all intermediate files ourselves unless there is an exception:
assert trackingDir.getCreatedFiles().size() == 1 && trackingDir.getCreatedFiles().contains(result);
sortInfo.totalTime = (System.currentTimeMillis() - sortInfo.totalTime);
success = true;
return result;
} finally {
if (success == false) {
IOUtils.deleteFilesIgnoringExceptions(trackingDir, trackingDir.getCreatedFiles());
}
}
sortInfo.totalTime = (System.currentTimeMillis() - sortInfo.totalTime);
return sortInfo;
}
/** Used by test framework */
static void setDefaultTempDir(Path tempDir) {
DEFAULT_TEMP_DIR = tempDir;
}
/**
* Returns the default temporary directory. By default, java.io.tmpdir. If not accessible
* or not available, an IOException is thrown
*/
public synchronized static Path getDefaultTempDir() throws IOException {
if (DEFAULT_TEMP_DIR == null) {
// Lazy init
String tempDirPath = System.getProperty("java.io.tmpdir");
if (tempDirPath == null) {
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
}
Path tempDirectory = Paths.get(tempDirPath);
if (Files.isWritable(tempDirectory) == false) {
throw new IOException("Java's temporary folder not present or writeable?: "
+ tempDirectory.toAbsolutePath());
}
DEFAULT_TEMP_DIR = tempDirectory;
}
return DEFAULT_TEMP_DIR;
}
/** Sort a single partition in-memory. */
protected Path sortPartition(int len) throws IOException {
protected String sortPartition(TrackingDirectoryWrapper trackingDir) throws IOException {
BytesRefArray data = this.buffer;
Path tempFile = Files.createTempFile(tempDirectory, "sort", "partition");
long start = System.currentTimeMillis();
sortInfo.sortTime += (System.currentTimeMillis() - start);
final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
BytesRef spare;
try {
try (IndexOutput tempFile = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
BytesRef spare;
long start = System.currentTimeMillis();
BytesRefIterator iter = buffer.iterator(comparator);
while((spare = iter.next()) != null) {
sortInfo.sortTime += (System.currentTimeMillis() - start);
while ((spare = iter.next()) != null) {
assert spare.length <= Short.MAX_VALUE;
out.write(spare);
}
out.close();
// Clean up the buffer for the next partition.
data.clear();
return tempFile;
} finally {
IOUtils.close(out);
return tempFile.getName();
}
}
/** Merge a list of sorted temporary files (partitions) into an output file */
void mergePartitions(List<Path> merges, Path outputFile) throws IOException {
/** Merge a list of sorted temporary files (partitions) into an output file. Note that this closes the
* incoming {@link IndexOutput}. */
void mergePartitions(Directory trackingDir, List<String> segments) throws IOException {
long start = System.currentTimeMillis();
ByteSequencesWriter out = new ByteSequencesWriter(outputFile);
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(segments.size()) {
@Override
protected boolean lessThan(FileAndTop a, FileAndTop b) {
return comparator.compare(a.current.get(), b.current.get()) < 0;
}
};
ByteSequencesReader [] streams = new ByteSequencesReader [merges.size()];
try {
ByteSequencesReader[] streams = new ByteSequencesReader[segments.size()];
String newSegmentName = null;
try (IndexOutput out = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
newSegmentName = out.getName();
ByteSequencesWriter writer = new ByteSequencesWriter(out);
// Open streams and read the top for each file
for (int i = 0; i < merges.size(); i++) {
streams[i] = new ByteSequencesReader(merges.get(i));
byte line[] = streams[i].read();
if (line != null) {
queue.insertWithOverflow(new FileAndTop(i, line));
}
for (int i = 0; i < segments.size(); i++) {
streams[i] = new ByteSequencesReader(dir.openInput(segments.get(i), IOContext.READONCE));
byte[] line = streams[i].read();
assert line != null;
queue.insertWithOverflow(new FileAndTop(i, line));
}
// Unix utility sort() uses ordered array of files to pick the next line from, updating
@ -374,7 +338,7 @@ public final class OfflineSorter {
// so it shouldn't make much of a difference (didn't check).
FileAndTop top;
while ((top = queue.top()) != null) {
out.write(top.current.bytes(), 0, top.current.length());
writer.write(top.current.bytes(), 0, top.current.length());
if (!streams[top.fd].read(top.current)) {
queue.pop();
} else {
@ -385,14 +349,15 @@ public final class OfflineSorter {
sortInfo.mergeTime += System.currentTimeMillis() - start;
sortInfo.mergeRounds++;
} finally {
// The logic below is: if an exception occurs in closing out, it has a priority over exceptions
// happening in closing streams.
try {
IOUtils.close(streams);
} finally {
IOUtils.close(out);
}
IOUtils.close(streams);
}
IOUtils.deleteFiles(trackingDir, segments);
segments.clear();
segments.add(newSegmentName);
sortInfo.tempMergeFiles++;
}
/** Read in a single partition of data */
@ -428,18 +393,11 @@ public final class OfflineSorter {
* Complementary to {@link ByteSequencesReader}.
*/
public static class ByteSequencesWriter implements Closeable {
private final DataOutput os;
/** Constructs a ByteSequencesWriter to the provided Path */
public ByteSequencesWriter(Path path) throws IOException {
this(new DataOutputStream(
new BufferedOutputStream(
Files.newOutputStream(path))));
}
private final IndexOutput out;
/** Constructs a ByteSequencesWriter to the provided DataOutput */
public ByteSequencesWriter(DataOutput os) {
this.os = os;
public ByteSequencesWriter(IndexOutput out) {
this.out = out;
}
/**
@ -455,7 +413,7 @@ public final class OfflineSorter {
* Writes a byte array.
* @see #write(byte[], int, int)
*/
public void write(byte [] bytes) throws IOException {
public void write(byte[] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
@ -465,25 +423,23 @@ public final class OfflineSorter {
* The length is written as a <code>short</code>, followed
* by the bytes.
*/
public void write(byte [] bytes, int off, int len) throws IOException {
public void write(byte[] bytes, int off, int len) throws IOException {
assert bytes != null;
assert off >= 0 && off + len <= bytes.length;
assert len >= 0;
if (len > Short.MAX_VALUE) {
throw new IllegalArgumentException("len must be <= " + Short.MAX_VALUE + "; got " + len);
}
os.writeShort(len);
os.write(bytes, off, len);
out.writeShort((short) len);
out.writeBytes(bytes, off, len);
}
/**
* Closes the provided {@link DataOutput} if it is {@link Closeable}.
* Closes the provided {@link IndexOutput}.
*/
@Override
public void close() throws IOException {
if (os instanceof Closeable) {
((Closeable) os).close();
}
out.close();
}
}
@ -492,18 +448,11 @@ public final class OfflineSorter {
* Complementary to {@link ByteSequencesWriter}.
*/
public static class ByteSequencesReader implements Closeable {
private final DataInput is;
private final IndexInput in;
/** Constructs a ByteSequencesReader from the provided Path */
public ByteSequencesReader(Path path) throws IOException {
this(new DataInputStream(
new BufferedInputStream(
Files.newInputStream(path))));
}
/** Constructs a ByteSequencesReader from the provided DataInput */
public ByteSequencesReader(DataInput is) {
this.is = is;
/** Constructs a ByteSequencesReader from the provided IndexInput */
public ByteSequencesReader(IndexInput in) {
this.in = in;
}
/**
@ -517,14 +466,14 @@ public final class OfflineSorter {
public boolean read(BytesRefBuilder ref) throws IOException {
short length;
try {
length = is.readShort();
length = in.readShort();
} catch (EOFException e) {
return false;
}
ref.grow(length);
ref.setLength(length);
is.readFully(ref.bytes(), 0, length);
in.readBytes(ref.bytes(), 0, length);
return true;
}
@ -540,25 +489,23 @@ public final class OfflineSorter {
public byte[] read() throws IOException {
short length;
try {
length = is.readShort();
length = in.readShort();
} catch (EOFException e) {
return null;
}
assert length >= 0 : "Sanity: sequence length < 0: " + length;
byte [] result = new byte [length];
is.readFully(result);
byte[] result = new byte[length];
in.readBytes(result, 0, length);
return result;
}
/**
* Closes the provided {@link DataInput} if it is {@link Closeable}.
* Closes the provided {@link IndexInput}.
*/
@Override
public void close() throws IOException {
if (is instanceof Closeable) {
((Closeable) is).close();
}
in.close();
}
}

View File

@ -283,7 +283,7 @@ public class TestCodecUtil extends LuceneTestCase {
final IndexOutput output = new RAMOutputStream(file, false);
AtomicLong fakeChecksum = new AtomicLong();
// wrap the index input where we control the checksum for mocking
IndexOutput fakeOutput = new IndexOutput("fake") {
IndexOutput fakeOutput = new IndexOutput("fake", "fake") {
@Override
public void close() throws IOException {
output.close();

View File

@ -17,20 +17,22 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.OfflineSorter.BufferSize;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.OfflineSorter.SortInfo;
import org.apache.lucene.util.OfflineSorter;
/**
* Tests for on-disk merge sorting.
@ -52,35 +54,53 @@ public class TestOfflineSorter extends LuceneTestCase {
super.tearDown();
}
private static Directory newDirectoryNoVirusScanner() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
public void testEmpty() throws Exception {
checkSort(new OfflineSorter(), new byte [][] {});
try (Directory dir = newDirectoryNoVirusScanner()) {
checkSort(dir, new OfflineSorter(dir, "foo"), new byte [][] {});
}
}
public void testSingleLine() throws Exception {
checkSort(new OfflineSorter(), new byte [][] {
"Single line only.".getBytes(StandardCharsets.UTF_8)
});
try (Directory dir = newDirectoryNoVirusScanner()) {
checkSort(dir, new OfflineSorter(dir, "foo"), new byte [][] {
"Single line only.".getBytes(StandardCharsets.UTF_8)
});
}
}
public void testIntermediateMerges() throws Exception {
// Sort 20 mb worth of data with 1mb buffer, binary merging.
SortInfo info = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.getDefaultTempDir(), 2),
generateRandom((int)OfflineSorter.MB * 20));
assertTrue(info.mergeRounds > 10);
try (Directory dir = newDirectoryNoVirusScanner()) {
SortInfo info = checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), 2),
generateRandom((int)OfflineSorter.MB * 20));
assertTrue(info.mergeRounds > 10);
}
}
public void testSmallRandom() throws Exception {
// Sort 20 mb worth of data with 1mb buffer.
SortInfo sortInfo = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.getDefaultTempDir(), OfflineSorter.MAX_TEMPFILES),
generateRandom((int)OfflineSorter.MB * 20));
assertEquals(1, sortInfo.mergeRounds);
try (Directory dir = newDirectoryNoVirusScanner()) {
SortInfo sortInfo = checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.MAX_TEMPFILES),
generateRandom((int)OfflineSorter.MB * 20));
assertEquals(1, sortInfo.mergeRounds);
}
}
@Nightly
public void testLargerRandom() throws Exception {
// Sort 100MB worth of data with 15mb buffer.
checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(16), OfflineSorter.getDefaultTempDir(), OfflineSorter.MAX_TEMPFILES),
generateRandom((int)OfflineSorter.MB * 100));
try (Directory dir = newDirectoryNoVirusScanner()) {
checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(16), OfflineSorter.MAX_TEMPFILES),
generateRandom((int)OfflineSorter.MB * 100));
}
}
private byte[][] generateRandom(int howMuchDataInBytes) {
@ -101,8 +121,9 @@ public class TestOfflineSorter extends LuceneTestCase {
final int max = Math.min(left.length, right.length);
for (int i = 0, j = 0; i < max; i++, j++) {
int diff = (left[i] & 0xff) - (right[j] & 0xff);
if (diff != 0)
if (diff != 0) {
return diff;
}
}
return left.length - right.length;
}
@ -111,54 +132,56 @@ public class TestOfflineSorter extends LuceneTestCase {
/**
* Check sorting data on an instance of {@link OfflineSorter}.
*/
private SortInfo checkSort(OfflineSorter sort, byte[][] data) throws IOException {
Path unsorted = writeAll("unsorted", data);
private SortInfo checkSort(Directory dir, OfflineSorter sorter, byte[][] data) throws IOException {
IndexOutput unsorted = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
writeAll(unsorted, data);
IndexOutput golden = dir.createTempOutput("golden", "tmp", IOContext.DEFAULT);
Arrays.sort(data, unsignedByteOrderComparator);
Path golden = writeAll("golden", data);
writeAll(golden, data);
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
SortInfo sortInfo;
try {
sortInfo = sort.sort(unsorted, sorted);
//System.out.println("Input size [MB]: " + unsorted.length() / (1024 * 1024));
//System.out.println(sortInfo);
assertFilesIdentical(golden, sorted);
} finally {
IOUtils.rm(unsorted, golden, sorted);
}
String sorted = sorter.sort(unsorted.getName());
//System.out.println("Input size [MB]: " + unsorted.length() / (1024 * 1024));
//System.out.println(sortInfo);
assertFilesIdentical(dir, golden.getName(), sorted);
return sortInfo;
return sorter.sortInfo;
}
/**
* Make sure two files are byte-byte identical.
*/
private void assertFilesIdentical(Path golden, Path sorted) throws IOException {
assertEquals(Files.size(golden), Files.size(sorted));
private void assertFilesIdentical(Directory dir, String golden, String sorted) throws IOException {
long numBytes = dir.fileLength(golden);
assertEquals(numBytes, dir.fileLength(sorted));
byte [] buf1 = new byte [64 * 1024];
byte [] buf2 = new byte [64 * 1024];
int len;
DataInputStream is1 = new DataInputStream(Files.newInputStream(golden));
DataInputStream is2 = new DataInputStream(Files.newInputStream(sorted));
while ((len = is1.read(buf1)) > 0) {
is2.readFully(buf2, 0, len);
for (int i = 0; i < len; i++) {
assertEquals(buf1[i], buf2[i]);
byte[] buf1 = new byte[64 * 1024];
byte[] buf2 = new byte[64 * 1024];
try (
IndexInput in1 = dir.openInput(golden, IOContext.READONCE);
IndexInput in2 = dir.openInput(sorted, IOContext.READONCE)
) {
long left = numBytes;
while (left > 0) {
int chunk = (int) Math.min(buf1.length, left);
left -= chunk;
in1.readBytes(buf1, 0, chunk);
in2.readBytes(buf2, 0, chunk);
for (int i = 0; i < chunk; i++) {
assertEquals(buf1[i], buf2[i]);
}
}
}
IOUtils.close(is1, is2);
}
private Path writeAll(String name, byte[][] data) throws IOException {
Path file = Files.createTempFile(tempDir, name, "");
ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
for (byte [] datum : data) {
w.write(datum);
/** NOTE: closes the provided {@link IndexOutput} */
private void writeAll(IndexOutput out, byte[][] data) throws IOException {
try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
for (byte [] datum : data) {
w.write(datum);
}
}
w.close();
return file;
}
public void testRamBuffer() {
@ -192,25 +215,27 @@ public class TestOfflineSorter extends LuceneTestCase {
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 10)];
final AtomicBoolean failed = new AtomicBoolean();
final int iters = atLeast(1000);
for(int i=0;i<threads.length;i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
for(int iter=0;iter<iters && failed.get() == false;iter++) {
checkSort(new OfflineSorter(), generateRandom(1024));
try (Directory dir = newDirectoryNoVirusScanner()) {
for(int i=0;i<threads.length;i++) {
final int threadID = i;
threads[i] = new Thread() {
@Override
public void run() {
try {
for(int iter=0;iter<iters && failed.get() == false;iter++) {
checkSort(dir, new OfflineSorter(dir, "foo_" + threadID + "_" + iter), generateRandom(1024));
}
} catch (Throwable th) {
failed.set(true);
throw new RuntimeException(th);
}
} catch (Throwable th) {
failed.set(true);
throw new RuntimeException(th);
}
}
};
threads[i].start();
}
for(Thread thread : threads) {
thread.join();
};
threads[i].start();
}
for(Thread thread : threads) {
thread.join();
}
}
assertFalse(failed.get());

View File

@ -143,7 +143,7 @@ public class SlowRAMDirectory extends RAMDirectory {
private final Random rand;
public SlowIndexOutput(IndexOutput io) {
super("SlowIndexOutput(" + io + ")");
super("SlowIndexOutput(" + io + ")", io.getName());
this.io = io;
this.rand = forkRandom();
}

View File

@ -18,10 +18,10 @@ package org.apache.lucene.store;
*/
import java.io.EOFException;
import java.io.IOException;
import java.io.FileInputStream;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
@ -149,7 +149,7 @@ public class NativeUnixDirectory extends FSDirectory {
return delegate.createOutput(name, context);
} else {
ensureCanWrite(name);
return new NativeUnixIndexOutput(getDirectory().resolve(name), mergeBufferSize);
return new NativeUnixIndexOutput(getDirectory().resolve(name), name, mergeBufferSize);
}
}
@ -167,8 +167,8 @@ public class NativeUnixDirectory extends FSDirectory {
private long fileLength;
private boolean isOpen;
public NativeUnixIndexOutput(Path path, int bufferSize) throws IOException {
super("NativeUnixIndexOutput(path=\"" + path.toString() + "\")");
public NativeUnixIndexOutput(Path path, String name, int bufferSize) throws IOException {
super("NativeUnixIndexOutput(path=\"" + path.toString() + "\")", name);
//this.path = path;
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
fos = new FileOutputStream(fd);

View File

@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -39,9 +40,13 @@ class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
final IndexOutput out;
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
final SegmentWriteState state;
final Directory tempDir;
final String tempFileNamePrefix;
public BKDTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
public BKDTreeDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.delegate = delegate;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
@ -91,7 +96,7 @@ class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
delegate.addSortedNumericField(field, docToValueCount, values);
BKDTreeWriter writer = new BKDTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
BKDTreeWriter writer = new BKDTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> valueIt = values.iterator();
Iterator<Number> valueCountIt = docToValueCount.iterator();
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {

View File

@ -99,7 +99,7 @@ public class BKDTreeDocValuesFormat extends DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
return new BKDTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
return new BKDTreeDocValuesConsumer(state.directory, state.segmentInfo.name, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
}
@Override

View File

@ -18,14 +18,13 @@ package org.apache.lucene.bkdtree;
*/
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -33,8 +32,8 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
@ -88,22 +87,27 @@ class BKDTreeWriter {
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private OfflineSorter.ByteSequencesWriter writer;
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapLatLonWriter heapWriter;
private Path tempInput;
private IndexOutput tempInput;
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
private long pointCount;
public BKDTreeWriter() throws IOException {
this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public BKDTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
@ -143,8 +147,8 @@ class BKDTreeWriter {
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<pointCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(heapWriter.latEncs[i]);
@ -152,7 +156,7 @@ class BKDTreeWriter {
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
@ -165,7 +169,7 @@ class BKDTreeWriter {
assert lonEnc < Integer.MAX_VALUE;
if (pointCount >= maxPointsSortInHeap) {
if (writer == null) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
@ -173,7 +177,7 @@ class BKDTreeWriter {
scratchBytesOutput.writeInt(lonEnc);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(pointCount);
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(latEnc, lonEnc, pointCount, docID);
@ -184,7 +188,7 @@ class BKDTreeWriter {
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private LatLonWriter convertToFixedWidth(Path in) throws IOException {
private LatLonWriter convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
@ -194,7 +198,7 @@ class BKDTreeWriter {
LatLonWriter sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(in);
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(pointCount);
for (long i=0;i<pointCount;i++) {
boolean result = reader.read(scratch);
@ -230,10 +234,10 @@ class BKDTreeWriter {
private LatLonWriter sort(boolean lon) throws IOException {
if (heapWriter != null) {
// All buffered points are still in heap
assert pointCount < Integer.MAX_VALUE;
// All buffered points are still in heap
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
@ -329,20 +333,19 @@ class BKDTreeWriter {
}
};
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
boolean success = false;
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
OfflineSorter latSorter = new OfflineSorter(cmp);
latSorter.sort(tempInput, sorted);
LatLonWriter writer = convertToFixedWidth(sorted);
LatLonWriter writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
IOUtils.rm(sorted);
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(sorted);
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
@ -352,8 +355,9 @@ class BKDTreeWriter {
public long finish(IndexOutput out) throws IOException {
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
if (writer != null) {
writer.close();
if (offlineWriter != null) {
// This also closes the temp file output:
offlineWriter.close();
}
LongBitSet bitSet = new LongBitSet(pointCount);
@ -410,7 +414,9 @@ class BKDTreeWriter {
if (success) {
latSortedWriter.destroy();
lonSortedWriter.destroy();
IOUtils.rm(tempInput);
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
latSortedWriter.destroy();
@ -422,7 +428,9 @@ class BKDTreeWriter {
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
IOUtils.deleteFilesIgnoringExceptions(tempInput);
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
@ -813,7 +821,7 @@ class BKDTreeWriter {
if (count < maxPointsSortInHeap) {
return new HeapLatLonWriter((int) count);
} else {
return new OfflineLatLonWriter(count);
return new OfflineLatLonWriter(tempDir, tempFileNamePrefix, count);
}
}

View File

@ -17,34 +17,23 @@ package org.apache.lucene.bkdtree;
* limitations under the License.
*/
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineLatLonReader implements LatLonReader {
final InputStreamDataInput in;
final IndexInput in;
long countLeft;
private int latEnc;
private int lonEnc;
private long ord;
private int docID;
OfflineLatLonReader(Path tempFile, long start, long count) throws IOException {
InputStream fis = Files.newInputStream(tempFile);
long seekFP = start * BKDTreeWriter.BYTES_PER_DOC;
long skipped = 0;
while (skipped < seekFP) {
long inc = fis.skip(seekFP - skipped);
skipped += inc;
if (inc == 0) {
throw new RuntimeException("skip returned 0");
}
}
in = new InputStreamDataInput(new BufferedInputStream(fis));
OfflineLatLonReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * BKDTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}

View File

@ -17,29 +17,26 @@ package org.apache.lucene.bkdtree;
* limitations under the License.
*/
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineLatLonWriter implements LatLonWriter {
final Path tempFile;
final Directory tempDir;
final byte[] scratchBytes = new byte[BKDTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final OutputStreamDataOutput out;
final IndexOutput out;
final long count;
private long countWritten;
private boolean closed;
public OfflineLatLonWriter(long count) throws IOException {
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
public OfflineLatLonWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
this.count = count;
}
@ -55,7 +52,7 @@ final class OfflineLatLonWriter implements LatLonWriter {
@Override
public LatLonReader getReader(long start) throws IOException {
assert closed;
return new OfflineLatLonReader(tempFile, start, count-start);
return new OfflineLatLonReader(tempDir, out.getName(), start, count-start);
}
@Override
@ -69,12 +66,12 @@ final class OfflineLatLonWriter implements LatLonWriter {
@Override
public void destroy() throws IOException {
IOUtils.rm(tempFile);
tempDir.deleteFile(out.getName());
}
@Override
public String toString() {
return "OfflineLatLonWriter(count=" + count + " tempFile=" + tempFile + ")";
return "OfflineLatLonWriter(count=" + count + " tempFileName=" + out.getName() + ")";
}
}

View File

@ -17,33 +17,22 @@ package org.apache.lucene.rangetree;
* limitations under the License.
*/
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineSliceReader implements SliceReader {
final InputStreamDataInput in;
long countLeft;
final IndexInput in;
private long countLeft;
private long value;
private long ord;
private int docID;
OfflineSliceReader(Path tempFile, long start, long count) throws IOException {
InputStream fis = Files.newInputStream(tempFile);
long seekFP = start * RangeTreeWriter.BYTES_PER_DOC;
long skipped = 0;
while (skipped < seekFP) {
long inc = fis.skip(seekFP - skipped);
skipped += inc;
if (inc == 0) {
throw new RuntimeException("skip returned 0");
}
}
in = new InputStreamDataInput(new BufferedInputStream(fis));
OfflineSliceReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * RangeTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}

View File

@ -17,50 +17,47 @@ package org.apache.lucene.rangetree;
* limitations under the License.
*/
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineSliceWriter implements SliceWriter {
final Path tempFile;
final Directory tempDir;
final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final OutputStreamDataOutput out;
final IndexOutput tempFile;
final long count;
private boolean closed;
private long countWritten;
public OfflineSliceWriter(long count) throws IOException {
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
public OfflineSliceWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
tempFile = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
this.count = count;
}
@Override
public void append(long value, long ord, int docID) throws IOException {
out.writeLong(value);
out.writeLong(ord);
out.writeInt(docID);
tempFile.writeLong(value);
tempFile.writeLong(ord);
tempFile.writeInt(docID);
countWritten++;
}
@Override
public SliceReader getReader(long start) throws IOException {
assert closed;
return new OfflineSliceReader(tempFile, start, count-start);
return new OfflineSliceReader(tempDir, tempFile.getName(), start, count-start);
}
@Override
public void close() throws IOException {
closed = true;
out.close();
tempFile.close();
if (count != countWritten) {
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
}
@ -68,12 +65,12 @@ final class OfflineSliceWriter implements SliceWriter {
@Override
public void destroy() throws IOException {
IOUtils.rm(tempFile);
tempDir.deleteFile(tempFile.getName());
}
@Override
public String toString() {
return "OfflineSliceWriter(count=" + count + " tempFile=" + tempFile + ")";
return "OfflineSliceWriter(count=" + count + " tempFileName=" + tempFile.getName() + ")";
}
}

View File

@ -91,7 +91,7 @@ class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable
@Override
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
delegate.addSortedNumericField(field, docToValueCount, values);
RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> valueIt = values.iterator();
Iterator<Number> valueCountIt = docToValueCount.iterator();
//System.out.println("\nSNF: field=" + field.name);
@ -127,7 +127,7 @@ class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
delegate.addSortedSetField(field, values, docToOrdCount, ords);
RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
Iterator<Number> ordsIt = ords.iterator();
//System.out.println("\nSSF: field=" + field.name);

View File

@ -18,22 +18,21 @@ package org.apache.lucene.rangetree;
*/
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
@ -77,10 +76,13 @@ class RangeTreeWriter {
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private OfflineSorter.ByteSequencesWriter writer;
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapSliceWriter heapWriter;
private Path tempInput;
private IndexOutput tempInput;
private final int maxValuesInLeafNode;
private final int maxValuesSortInHeap;
@ -88,13 +90,15 @@ class RangeTreeWriter {
private long globalMinValue = Long.MAX_VALUE;
private long globalMaxValue = Long.MIN_VALUE;
public RangeTreeWriter() throws IOException {
this(DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
}
// TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public RangeTreeWriter(int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
verifyParams(maxValuesInLeafNode, maxValuesSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxValuesInLeafNode = maxValuesInLeafNode;
this.maxValuesSortInHeap = maxValuesSortInHeap;
@ -121,15 +125,15 @@ class RangeTreeWriter {
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<valueCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeLong(heapWriter.values[i]);
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
@ -137,14 +141,14 @@ class RangeTreeWriter {
void add(long value, int docID) throws IOException {
if (valueCount >= maxValuesSortInHeap) {
if (writer == null) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeLong(value);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(valueCount);
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(value, valueCount, docID);
@ -157,7 +161,7 @@ class RangeTreeWriter {
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private SliceWriter convertToFixedWidth(Path in) throws IOException {
private SliceWriter convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
@ -167,7 +171,7 @@ class RangeTreeWriter {
SliceWriter sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(in);
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(valueCount);
for (long i=0;i<valueCount;i++) {
boolean result = reader.read(scratch);
@ -280,19 +284,18 @@ class RangeTreeWriter {
}
};
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
OfflineSorter sorter = new OfflineSorter(cmp);
sorter.sort(tempInput, sorted);
SliceWriter writer = convertToFixedWidth(sorted);
SliceWriter writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
IOUtils.rm(sorted);
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(sorted);
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
@ -301,8 +304,8 @@ class RangeTreeWriter {
/** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
public long finish(IndexOutput out) throws IOException {
if (writer != null) {
writer.close();
if (offlineWriter != null) {
offlineWriter.close();
}
if (valueCount == 0) {
@ -357,14 +360,18 @@ class RangeTreeWriter {
} finally {
if (success) {
sortedWriter.destroy();
IOUtils.rm(tempInput);
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
sortedWriter.destroy();
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
IOUtils.deleteFilesIgnoringExceptions(tempInput);
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
@ -567,7 +574,7 @@ class RangeTreeWriter {
if (count < maxValuesSortInHeap) {
return new HeapSliceWriter((int) count);
} else {
return new OfflineSliceWriter(count);
return new OfflineSliceWriter(tempDir, tempFileNamePrefix, count);
}
}
}

View File

@ -17,6 +17,15 @@ package org.apache.lucene.bkdtree;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
@ -37,24 +46,16 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Nightly;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
public class TestBKDTree extends LuceneTestCase {
private static boolean smallBBox;
@ -163,7 +164,7 @@ public class TestBKDTree extends LuceneTestCase {
// Every doc has 2 points:
double[] lats = new double[2*numPoints];
double[] lons = new double[2*numPoints];
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
@ -378,7 +379,7 @@ public class TestBKDTree extends LuceneTestCase {
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = newDirectory();
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
@ -608,7 +609,7 @@ public class TestBKDTree extends LuceneTestCase {
}
public void testAccountableHasDelegate() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
iwc.setCodec(codec);
@ -632,4 +633,12 @@ public class TestBKDTree extends LuceneTestCase {
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
return new BKDTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
}
private static Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -17,6 +17,14 @@ package org.apache.lucene.rangetree;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
@ -39,6 +47,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.Bits;
@ -49,14 +58,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.BeforeClass;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
public class TestRangeTree extends LuceneTestCase {
// Controls what range of values we randomly generate, so we sometimes test narrow ranges:
@ -116,7 +117,7 @@ public class TestRangeTree extends LuceneTestCase {
int numValues = atLeast(10000);
// Every doc has 2 values:
long[] values = new long[2*numValues];
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
@ -201,7 +202,7 @@ public class TestRangeTree extends LuceneTestCase {
int numValues = atLeast(10000);
// Every doc has 2 values:
long[] values = new long[2*numValues];
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
@ -370,7 +371,7 @@ public class TestRangeTree extends LuceneTestCase {
if (values.length > 100000) {
dir = newFSDirectory(createTempDir("TestRangeTree"));
} else {
dir = newDirectory();
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
@ -534,7 +535,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testAccountableHasDelegate() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -554,7 +555,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testMinMaxLong() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -580,7 +581,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testBasicSortedSet() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -613,7 +614,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testLongMinMaxNumeric() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -641,7 +642,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testLongMinMaxSortedSet() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -669,7 +670,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testSortedSetNoOrdsMatch() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -693,7 +694,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testNumericNoValuesMatch() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -715,7 +716,7 @@ public class TestRangeTree extends LuceneTestCase {
}
public void testNoDocs() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
iwc.setCodec(codec);
@ -766,4 +767,12 @@ public class TestRangeTree extends LuceneTestCase {
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
return new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
}
private static Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -18,14 +18,13 @@ package org.apache.lucene.bkdtree3d;
*/
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -33,8 +32,8 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
import org.apache.lucene.util.RamUsageEstimator;
// TODO
@ -83,10 +82,13 @@ class BKD3DTreeWriter {
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
private OfflineSorter.ByteSequencesWriter writer;
private final Directory tempDir;
private final String tempFileNamePrefix;
private OfflineSorter.ByteSequencesWriter offlineWriter;
private GrowingHeapWriter heapWriter;
private Path tempInput;
private IndexOutput tempInput;
private final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
@ -94,13 +96,15 @@ class BKD3DTreeWriter {
private final int[] scratchDocIDs;
public BKD3DTreeWriter() throws IOException {
this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
}
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
public BKD3DTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
scratchDocIDs = new int[maxPointsInLeafNode];
@ -128,8 +132,8 @@ class BKD3DTreeWriter {
private void switchToOffline() throws IOException {
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
for(int i=0;i<pointCount;i++) {
scratchBytesOutput.reset(scratchBytes);
scratchBytesOutput.writeInt(heapWriter.xs[i]);
@ -138,7 +142,7 @@ class BKD3DTreeWriter {
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
scratchBytesOutput.writeVLong(i);
// TODO: can/should OfflineSorter optimize the fixed-width case?
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
}
heapWriter = null;
@ -147,7 +151,7 @@ class BKD3DTreeWriter {
public void add(int x, int y, int z, int docID) throws IOException {
if (pointCount >= maxPointsSortInHeap) {
if (writer == null) {
if (offlineWriter == null) {
switchToOffline();
}
scratchBytesOutput.reset(scratchBytes);
@ -156,7 +160,7 @@ class BKD3DTreeWriter {
scratchBytesOutput.writeInt(z);
scratchBytesOutput.writeVInt(docID);
scratchBytesOutput.writeVLong(pointCount);
writer.write(scratchBytes, 0, scratchBytes.length);
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
} else {
// Not too many points added yet, continue using heap:
heapWriter.append(x, y, z, pointCount, docID);
@ -167,7 +171,7 @@ class BKD3DTreeWriter {
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
* as we recurse in {@link #build}. */
private Writer convertToFixedWidth(Path in) throws IOException {
private Writer convertToFixedWidth(String in) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
scratch.grow(BYTES_PER_DOC);
BytesRef bytes = scratch.get();
@ -177,7 +181,7 @@ class BKD3DTreeWriter {
Writer sortedWriter = null;
boolean success = false;
try {
reader = new OfflineSorter.ByteSequencesReader(in);
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
sortedWriter = getWriter(pointCount);
for (long i=0;i<pointCount;i++) {
boolean result = reader.read(scratch);
@ -328,19 +332,18 @@ class BKD3DTreeWriter {
}
};
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
boolean success = false;
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
String sortedFileName = sorter.sort(tempInput.getName());
try {
OfflineSorter sorter = new OfflineSorter(cmp);
sorter.sort(tempInput, sorted);
Writer writer = convertToFixedWidth(sorted);
Writer writer = convertToFixedWidth(sortedFileName);
success = true;
return writer;
} finally {
if (success) {
IOUtils.rm(sorted);
tempDir.deleteFile(sortedFileName);
} else {
IOUtils.deleteFilesIgnoringExceptions(sorted);
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
}
}
}
@ -350,8 +353,8 @@ class BKD3DTreeWriter {
public long finish(IndexOutput out) throws IOException {
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter + " maxPointsInLeafNode=" + maxPointsInLeafNode);
if (writer != null) {
writer.close();
if (offlineWriter != null) {
offlineWriter.close();
}
LongBitSet bitSet = new LongBitSet(pointCount);
@ -413,7 +416,9 @@ class BKD3DTreeWriter {
xSortedWriter.destroy();
ySortedWriter.destroy();
zSortedWriter.destroy();
IOUtils.rm(tempInput);
if (tempInput != null) {
tempDir.deleteFile(tempInput.getName());
}
} else {
try {
xSortedWriter.destroy();
@ -430,7 +435,9 @@ class BKD3DTreeWriter {
} catch (Throwable t) {
// Suppress to keep throwing original exc
}
IOUtils.deleteFilesIgnoringExceptions(tempInput);
if (tempInput != null) {
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
}
}
}
@ -911,7 +918,7 @@ class BKD3DTreeWriter {
if (count < maxPointsSortInHeap) {
return new HeapWriter((int) count);
} else {
return new OfflineWriter(count);
return new OfflineWriter(tempDir, tempFileNamePrefix, count);
}
}
}

View File

@ -17,22 +17,23 @@ package org.apache.lucene.bkdtree3d;
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
final DocValuesConsumer delegate;
final int maxPointsInLeafNode;
@ -40,9 +41,14 @@ class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
final IndexOutput out;
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
final SegmentWriteState state;
final Directory tempDir;
final String tempFileNamePrefix;
public Geo3DDocValuesConsumer(PlanetModel planetModel, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
public Geo3DDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, PlanetModel planetModel, DocValuesConsumer delegate,
SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.delegate = delegate;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.maxPointsSortInHeap = maxPointsSortInHeap;
@ -106,7 +112,7 @@ class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
@Override
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
delegate.addBinaryField(field, values);
BKD3DTreeWriter writer = new BKD3DTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
BKD3DTreeWriter writer = new BKD3DTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
Iterator<BytesRef> valuesIt = values.iterator();
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
assert valuesIt.hasNext();

View File

@ -22,7 +22,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
import org.apache.lucene.geo3d.PlanetModel;
import org.apache.lucene.geo3d.Vector;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@ -106,7 +105,7 @@ public class Geo3DDocValuesFormat extends DocValuesFormat {
@Override
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
return new Geo3DDocValuesConsumer(planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
return new Geo3DDocValuesConsumer(state.directory, state.segmentInfo.name, planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
}
@Override

View File

@ -17,16 +17,14 @@ package org.apache.lucene.bkdtree3d;
* limitations under the License.
*/
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
final class OfflineReader implements Reader {
final InputStreamDataInput in;
final IndexInput in;
long countLeft;
private int x;
private int y;
@ -34,18 +32,9 @@ final class OfflineReader implements Reader {
private long ord;
private int docID;
OfflineReader(Path tempFile, long start, long count) throws IOException {
InputStream fis = Files.newInputStream(tempFile);
long seekFP = start * BKD3DTreeWriter.BYTES_PER_DOC;
long skipped = 0;
while (skipped < seekFP) {
long inc = fis.skip(seekFP - skipped);
skipped += inc;
if (inc == 0) {
throw new RuntimeException("skip returned 0");
}
}
in = new InputStreamDataInput(new BufferedInputStream(fis));
OfflineReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
in = tempDir.openInput(tempFileName, IOContext.READONCE);
in.seek(start * BKD3DTreeWriter.BYTES_PER_DOC);
this.countLeft = count;
}

View File

@ -17,29 +17,26 @@ package org.apache.lucene.bkdtree3d;
* limitations under the License.
*/
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
final class OfflineWriter implements Writer {
final Path tempFile;
final Directory tempDir;
final IndexOutput out;
final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
final OutputStreamDataOutput out;
final long count;
private long countWritten;
private boolean closed;
public OfflineWriter(long count) throws IOException {
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
public OfflineWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
this.tempDir = tempDir;
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
this.count = count;
}
@ -56,7 +53,7 @@ final class OfflineWriter implements Writer {
@Override
public Reader getReader(long start) throws IOException {
assert closed;
return new OfflineReader(tempFile, start, count-start);
return new OfflineReader(tempDir, out.getName(), start, count-start);
}
@Override
@ -70,11 +67,11 @@ final class OfflineWriter implements Writer {
@Override
public void destroy() throws IOException {
IOUtils.rm(tempFile);
tempDir.deleteFile(out.getName());
}
@Override
public String toString() {
return "OfflineWriter(count=" + count + " tempFile=" + tempFile + ")";
return "OfflineWriter(count=" + count + " tempFileName=" + out.getName() + ")";
}
}

View File

@ -17,6 +17,16 @@ package org.apache.lucene.bkdtree3d;
* limitations under the License.
*/
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
@ -50,6 +60,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@ -58,16 +69,6 @@ import org.junit.BeforeClass;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueCenter;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMax;
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMin;
@ -87,7 +88,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
public void testBasic() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
IndexWriterConfig iwc = newIndexWriterConfig();
@ -108,7 +109,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
public void testPlanetModelChanged() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
IndexWriterConfig iwc = newIndexWriterConfig();
@ -137,10 +138,10 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
public void testBKDBasic() throws Exception {
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
BKD3DTreeWriter w = new BKD3DTreeWriter();
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d");
w.add(0, 0, 0, 0);
w.add(1, 1, 1, 1);
@ -245,7 +246,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
public void testBKDRandom() throws Exception {
List<Point> points = new ArrayList<>();
int numPoints = atLeast(10000);
Directory dir = newDirectory();
Directory dir = getDirectory();
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
@ -254,7 +255,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
PlanetModel planetModel = getPlanetModel();
final double planetMax = planetModel.getMaximumMagnitude();
BKD3DTreeWriter w = new BKD3DTreeWriter(maxPointsInLeaf, maxPointsSortInHeap);
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d", maxPointsInLeaf, maxPointsSortInHeap);
for(int docID=0;docID<numPoints;docID++) {
Point point;
if (docID > 0 && random().nextInt(30) == 17) {
@ -924,7 +925,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = newDirectory();
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
@ -1059,4 +1060,12 @@ public class TestGeo3DPointField extends LuceneTestCase {
}
IOUtils.close(r, dir);
}
private static Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -18,14 +18,15 @@ package org.apache.lucene.search.suggest;
*/
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -41,12 +42,14 @@ import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
public class SortedInputIterator implements InputIterator {
private final InputIterator source;
private Path tempInput;
private Path tempSorted;
private IndexOutput tempInput;
private String tempSortedFileName;
private final ByteSequencesReader reader;
private final Comparator<BytesRef> comparator;
private final boolean hasPayloads;
private final boolean hasContexts;
private final Directory tempDir;
private final String tempFileNamePrefix;
private boolean done = false;
private long weight;
@ -58,19 +61,21 @@ public class SortedInputIterator implements InputIterator {
* Creates a new sorted wrapper, using {@link
* BytesRef#getUTF8SortedAsUnicodeComparator} for
* sorting. */
public SortedInputIterator(InputIterator source) throws IOException {
this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
public SortedInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException {
this(tempDir, tempFileNamePrefix, source, BytesRef.getUTF8SortedAsUnicodeComparator());
}
/**
* Creates a new sorted wrapper, sorting by BytesRef
* (ascending) then cost (ascending).
*/
public SortedInputIterator(InputIterator source, Comparator<BytesRef> comparator) throws IOException {
public SortedInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source, Comparator<BytesRef> comparator) throws IOException {
this.hasPayloads = source.hasPayloads();
this.hasContexts = source.hasContexts();
this.source = source;
this.comparator = comparator;
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
this.reader = sort();
}
@ -83,7 +88,7 @@ public class SortedInputIterator implements InputIterator {
try {
ByteArrayDataInput input = new ByteArrayDataInput();
if (reader.read(scratch)) {
final BytesRef bytes = scratch.get();
final BytesRef bytes = scratch.get();
weight = decode(bytes, input);
if (hasPayloads) {
payload = decodePayload(bytes, input);
@ -168,10 +173,9 @@ public class SortedInputIterator implements InputIterator {
};
private ByteSequencesReader sort() throws IOException {
String prefix = getClass().getSimpleName();
Path directory = OfflineSorter.getDefaultTempDir();
tempInput = Files.createTempFile(directory, prefix, ".input");
tempSorted = Files.createTempFile(directory, prefix, ".sorted");
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, tieBreakByCostComparator);
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
final OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
boolean success = false;
@ -184,8 +188,8 @@ public class SortedInputIterator implements InputIterator {
encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight());
}
writer.close();
new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempSorted);
tempSortedFileName = sorter.sort(tempInput.getName());
ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
success = true;
return reader;
@ -203,16 +207,12 @@ public class SortedInputIterator implements InputIterator {
}
private void close() throws IOException {
boolean success = false;
try {
IOUtils.close(reader);
success = true;
} finally {
if (success) {
IOUtils.deleteFilesIfExist(tempInput, tempSorted);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
}
IOUtils.deleteFilesIgnoringExceptions(tempDir,
tempInput == null ? null : tempInput.getName(),
tempSortedFileName);
}
}

View File

@ -17,11 +17,7 @@ package org.apache.lucene.search.suggest.analyzing;
* limitations under the License.
*/
import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -39,6 +35,9 @@ import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
@ -64,6 +63,8 @@ import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.fst.Util.Result;
import org.apache.lucene.util.fst.Util.TopResults;
import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
/**
* Suggester that first analyzes the surface form, adds the
* analyzed form to a weighted FST, and then does the same
@ -150,14 +151,14 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
private final boolean preserveSep;
/** Include this flag in the options parameter to {@link
* #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to always
* #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)} to always
* return the exact match first, regardless of score. This
* has no performance impact but could result in
* low-quality suggestions. */
public static final int EXACT_FIRST = 1;
/** Include this flag in the options parameter to {@link
* #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to preserve
* #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)} to preserve
* token separators when matching. */
public static final int PRESERVE_SEP = 2;
@ -179,6 +180,9 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
* SynonymFilter). */
private final int maxGraphExpansions;
private final Directory tempDir;
private final String tempFileNamePrefix;
/** Highest number of analyzed paths we saw for any single
* input surface form. For analyzers that never create
* graphs this will always be 1. */
@ -195,21 +199,21 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
private long count = 0;
/**
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
* Calls {@link #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
* PRESERVE_SEP, 256, -1, true)}
*/
public AnalyzingSuggester(Analyzer analyzer) {
this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer analyzer) {
this(tempDir, tempFileNamePrefix, analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
}
/**
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
* Calls {@link #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)
* AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
* PRESERVE_SEP, 256, -1, true)}
*/
public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
this(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
}
/**
@ -230,7 +234,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
* @param preservePositionIncrements Whether position holes
* should appear in the automata
*/
public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
boolean preservePositionIncrements) {
this.indexAnalyzer = indexAnalyzer;
this.queryAnalyzer = queryAnalyzer;
@ -254,6 +258,8 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
}
this.maxGraphExpansions = maxGraphExpansions;
this.preservePositionIncrements = preservePositionIncrements;
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
/** Returns byte size of the underlying FST. */
@ -396,20 +402,21 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
if (iterator.hasContexts()) {
throw new IllegalArgumentException("this suggester doesn't support contexts");
}
String prefix = getClass().getSimpleName();
Path directory = OfflineSorter.getDefaultTempDir();
Path tempInput = Files.createTempFile(directory, prefix, ".input");
Path tempSorted = Files.createTempFile(directory, prefix, ".sorted");
hasPayloads = iterator.hasPayloads();
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, new AnalyzingComparator(hasPayloads));
IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
OfflineSorter.ByteSequencesReader reader = null;
BytesRefBuilder scratch = new BytesRefBuilder();
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
boolean success = false;
String tempSortedFileName = null;
count = 0;
byte buffer[] = new byte[8];
try {
@ -477,12 +484,12 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
writer.close();
// Sort all input/output pairs (required by FST.Builder):
new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
tempSortedFileName = sorter.sort(tempInput.getName());
// Free disk space:
Files.delete(tempInput);
tempDir.deleteFile(tempInput.getName());
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
@ -570,16 +577,9 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
fst = builder.finish();
//Util.dotToFile(fst, "/tmp/suggest.dot");
success = true;
} finally {
IOUtils.closeWhileHandlingException(reader, writer);
if (success) {
IOUtils.deleteFilesIfExist(tempInput, tempSorted);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
}
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
}
}

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
@ -113,8 +114,8 @@ public final class FuzzySuggester extends AnalyzingSuggester {
*
* @param analyzer the analyzer used for this suggester
*/
public FuzzySuggester(Analyzer analyzer) {
this(analyzer, analyzer);
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer analyzer) {
this(tempDir, tempFileNamePrefix, analyzer, analyzer);
}
/**
@ -125,8 +126,8 @@ public final class FuzzySuggester extends AnalyzingSuggester {
* @param queryAnalyzer
* Analyzer that will be used for analyzing query text during lookup
*/
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
this(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE);
}
@ -154,11 +155,11 @@ public final class FuzzySuggester extends AnalyzingSuggester {
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
* @param unicodeAware operate Unicode code points instead of bytes.
*/
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
boolean preservePositionIncrements, int maxEdits, boolean transpositions,
int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
super(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}

View File

@ -19,10 +19,10 @@ package org.apache.lucene.search.suggest.fst;
import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
@ -34,48 +34,49 @@ import org.apache.lucene.util.OfflineSorter;
* @lucene.internal
*/
public class ExternalRefSorter implements BytesRefSorter, Closeable {
private final OfflineSorter sort;
private final OfflineSorter sorter;
private OfflineSorter.ByteSequencesWriter writer;
private Path input;
private Path sorted;
private IndexOutput input;
private String sortedFileName;
/**
* Will buffer all sequences to a temporary file and then sort (all on-disk).
*/
public ExternalRefSorter(OfflineSorter sort) throws IOException {
this.sort = sort;
this.input = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "RefSorter-", ".raw");
this.writer = new OfflineSorter.ByteSequencesWriter(input);
public ExternalRefSorter(OfflineSorter sorter) throws IOException {
this.sorter = sorter;
this.input = sorter.getDirectory().createTempOutput(sorter.getTempFileNamePrefix(), "RefSorterRaw", IOContext.DEFAULT);
this.writer = new OfflineSorter.ByteSequencesWriter(this.input);
}
@Override
public void add(BytesRef utf8) throws IOException {
if (writer == null) throw new IllegalStateException();
if (writer == null) {
throw new IllegalStateException();
}
writer.write(utf8);
}
@Override
public BytesRefIterator iterator() throws IOException {
if (sorted == null) {
if (sortedFileName == null) {
closeWriter();
sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "RefSorter-", ".sorted");
boolean success = false;
try {
sort.sort(input, sorted);
sortedFileName = sorter.sort(input.getName());
success = true;
} finally {
if (success) {
Files.delete(input);
sorter.getDirectory().deleteFile(input.getName());
} else {
IOUtils.deleteFilesIgnoringExceptions(input);
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(), input.getName());
}
}
input = null;
}
return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorted));
return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorter.getDirectory().openInput(sortedFileName, IOContext.READONCE)));
}
private void closeWriter() throws IOException {
@ -90,16 +91,12 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
*/
@Override
public void close() throws IOException {
boolean success = false;
try {
closeWriter();
success = true;
} finally {
if (success) {
IOUtils.deleteFilesIfExist(input, sorted);
} else {
IOUtils.deleteFilesIgnoringExceptions(input, sorted);
}
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(),
input == null ? null : input.getName(),
sortedFileName);
}
}
@ -142,6 +139,6 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
@Override
public Comparator<BytesRef> getComparator() {
return sort.getComparator();
return sorter.getComparator();
}
}

View File

@ -18,8 +18,6 @@ package org.apache.lucene.search.suggest.fst;
*/
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -34,6 +32,9 @@ import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
@ -42,7 +43,6 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.OfflineSorter.SortInfo;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
@ -76,7 +76,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
* An invalid bucket count if we're creating an object
* of this class from an existing FST.
*
* @see #FSTCompletionLookup(FSTCompletion, boolean)
* @see #FSTCompletionLookup(Directory, String, FSTCompletion, boolean)
*/
private static int INVALID_BUCKETS_COUNT = -1;
@ -89,6 +89,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
*/
private final static int sharedTailLength = 5;
private final Directory tempDir;
private final String tempFileNamePrefix;
private int buckets;
private boolean exactMatchFirst;
@ -105,14 +108,21 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
/** Number of entries the lookup was built with */
private long count = 0;
/**
* This constructor should only be used to read a previously saved suggester.
*/
public FSTCompletionLookup() {
this(null, null);
}
/**
* This constructor prepares for creating a suggested FST using the
* {@link #build(InputIterator)} method. The number of weight
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
* exact matches are promoted to the top of the suggestions list.
*/
public FSTCompletionLookup() {
this(FSTCompletion.DEFAULT_BUCKETS, true);
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix) {
this(tempDir, tempFileNamePrefix, FSTCompletion.DEFAULT_BUCKETS, true);
}
/**
@ -128,9 +138,11 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
* suggestions list. Otherwise they appear in the order of
* discretized weight and alphabetical within the bucket.
*/
public FSTCompletionLookup(int buckets, boolean exactMatchFirst) {
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, int buckets, boolean exactMatchFirst) {
this.buckets = buckets;
this.exactMatchFirst = exactMatchFirst;
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
/**
@ -143,8 +155,8 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
* suggestions list. Otherwise they appear in the order of
* discretized weight and alphabetical within the bucket.
*/
public FSTCompletionLookup(FSTCompletion completion, boolean exactMatchFirst) {
this(INVALID_BUCKETS_COUNT, exactMatchFirst);
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, FSTCompletion completion, boolean exactMatchFirst) {
this(tempDir, tempFileNamePrefix, INVALID_BUCKETS_COUNT, exactMatchFirst);
this.normalCompletion = new FSTCompletion(
completion.getFST(), false, exactMatchFirst);
this.higherWeightsCompletion = new FSTCompletion(
@ -159,23 +171,23 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
if (iterator.hasContexts()) {
throw new IllegalArgumentException("this suggester doesn't support contexts");
}
Path tempInput = Files.createTempFile(
OfflineSorter.getDefaultTempDir(), FSTCompletionLookup.class.getSimpleName(), ".input");
Path tempSorted = Files.createTempFile(
OfflineSorter.getDefaultTempDir(), FSTCompletionLookup.class.getSimpleName(), ".sorted");
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
String tempSortedFileName = null;
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
OfflineSorter.ByteSequencesReader reader = null;
ExternalRefSorter sorter = null;
// Push floats up front before sequences to sort them. For now, assume they are non-negative.
// If negative floats are allowed some trickery needs to be done to find their byte order.
boolean success = false;
count = 0;
try {
byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
BytesRef spare;
int inputLineCount = 0;
while ((spare = iterator.next()) != null) {
if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
@ -185,18 +197,19 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
output.writeInt(encodeWeight(iterator.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
inputLineCount++;
}
writer.close();
// We don't know the distribution of scores and we need to bucket them, so we'll sort
// and divide into equal buckets.
SortInfo info = new OfflineSorter().sort(tempInput, tempSorted);
Files.delete(tempInput);
FSTCompletionBuilder builder = new FSTCompletionBuilder(
buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);
tempSortedFileName = sorter.sort(tempInput.getName());
tempDir.deleteFile(tempInput.getName());
final int inputLines = info.lines;
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
FSTCompletionBuilder builder = new FSTCompletionBuilder(
buckets, externalSorter, sharedTailLength);
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
long line = 0;
int previousBucket = 0;
int previousScore = 0;
@ -211,7 +224,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
if (line > 0 && currentScore == previousScore) {
bucket = previousBucket;
} else {
bucket = (int) (line * buckets / inputLines);
bucket = (int) (line * buckets / inputLineCount);
}
previousScore = currentScore;
previousBucket = bucket;
@ -231,15 +244,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
this.normalCompletion = new FSTCompletion(
higherWeightsCompletion.getFST(), false, exactMatchFirst);
success = true;
} finally {
IOUtils.closeWhileHandlingException(reader, writer, sorter);
if (success) {
Files.delete(tempSorted);
} else {
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
}
IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
}
}
@ -285,8 +292,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
@Override
public synchronized boolean store(DataOutput output) throws IOException {
output.writeVLong(count);
if (this.normalCompletion == null || normalCompletion.getFST() == null)
if (this.normalCompletion == null || normalCompletion.getFST() == null) {
return false;
}
normalCompletion.getFST().save(output);
return true;
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
@ -78,11 +79,14 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
/** Number of entries the lookup was built with */
private long count = 0;
private final Directory tempDir;
private final String tempFileNamePrefix;
/**
* Calls {@link #WFSTCompletionLookup(boolean) WFSTCompletionLookup(true)}
* Calls {@link #WFSTCompletionLookup(Directory,String,boolean) WFSTCompletionLookup(null,null,true)}
*/
public WFSTCompletionLookup() {
this(true);
public WFSTCompletionLookup(Directory tempDir, String tempFileNamePrefix) {
this(tempDir, tempFileNamePrefix, true);
}
/**
@ -93,8 +97,10 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
* of score. This has no performance impact, but could result
* in low-quality suggestions.
*/
public WFSTCompletionLookup(boolean exactFirst) {
public WFSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, boolean exactFirst) {
this.exactFirst = exactFirst;
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
@Override
@ -107,7 +113,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
}
count = 0;
BytesRef scratch = new BytesRef();
InputIterator iter = new WFSTInputIterator(iterator);
InputIterator iter = new WFSTInputIterator(tempDir, tempFileNamePrefix, iterator);
IntsRefBuilder scratchInts = new IntsRefBuilder();
BytesRefBuilder previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@ -264,8 +270,8 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
private final class WFSTInputIterator extends SortedInputIterator {
WFSTInputIterator(InputIterator source) throws IOException {
super(source);
WFSTInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException {
super(tempDir, tempFileNamePrefix, source);
assert source.hasPayloads() == false;
}

View File

@ -27,11 +27,10 @@ import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.SortedInputIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
/**
* Suggest implementation based on a
@ -45,12 +44,26 @@ public class TSTLookup extends Lookup {
/** Number of entries the lookup was built with */
private long count = 0;
private final Directory tempDir;
private final String tempFileNamePrefix;
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
* @see #build(InputIterator)
*/
public TSTLookup() {}
public TSTLookup() {
this(null, null);
}
/**
* Creates a new TSTLookup, for building.
* @see #build(InputIterator)
*/
public TSTLookup(Directory tempDir, String tempFileNamePrefix) {
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
@Override
public void build(InputIterator iterator) throws IOException {
@ -63,7 +76,7 @@ public class TSTLookup extends Lookup {
root = new TernaryTreeNode();
// make sure it's sorted and the comparator uses UTF16 sort order
iterator = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUTF16Comparator());
iterator = new SortedInputIterator(tempDir, tempFileNamePrefix, iterator, BytesRef.getUTF8SortedAsUTF16Comparator());
count = 0;
ArrayList<String> tokens = new ArrayList<>();
ArrayList<Number> vals = new ArrayList<>();

View File

@ -18,13 +18,15 @@ package org.apache.lucene.search.suggest;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Random;
import java.util.List;
import java.util.Random;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -58,11 +60,26 @@ public class PersistenceTest extends LuceneTestCase {
runTest(FSTCompletionLookup.class, false);
}
private void runTest(Class<? extends Lookup> lookupClass,
boolean supportsExactWeights) throws Exception {
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
private void runTest(Class<? extends Lookup> lookupClass, boolean supportsExactWeights) throws Exception {
// Add all input keys.
Lookup lookup = lookupClass.newInstance();
Lookup lookup;
Directory tempDir = getDirectory();
if (lookupClass == TSTLookup.class) {
lookup = new TSTLookup(tempDir, "suggest");
} else if (lookupClass == FSTCompletionLookup.class) {
lookup = new FSTCompletionLookup(tempDir, "suggest");
} else {
lookup = lookupClass.newInstance();
}
Input[] keys = new Input[this.keys.length];
for (int i = 0; i < keys.length; i++)
keys[i] = new Input(this.keys[i], i);
@ -92,5 +109,6 @@ public class PersistenceTest extends LuceneTestCase {
previous = lookupResult.value;
}
}
tempDir.close();
}
}

View File

@ -26,18 +26,22 @@ import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestInputIterator extends LuceneTestCase {
public void testEmpty() throws Exception {
InputArrayIterator iterator = new InputArrayIterator(new Input[0]);
InputIterator wrapper = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
assertNull(wrapper.next());
wrapper = new UnsortedInputIterator(iterator);
assertNull(wrapper.next());
try (Directory dir = getDirectory()) {
InputIterator wrapper = new SortedInputIterator(dir, "sorted", iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
assertNull(wrapper.next());
wrapper = new UnsortedInputIterator(iterator);
assertNull(wrapper.next());
}
}
public void testTerms() throws Exception {
@ -77,45 +81,51 @@ public class TestInputIterator extends LuceneTestCase {
}
// test the sorted iterator wrapper with payloads
InputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
try (Directory tempDir = getDirectory()) {
InputIterator wrapper = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsorted), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
while (expected.hasNext()) {
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
assertEquals(entry.getValue().getValue(), wrapper.payload());
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
assertEquals(entry.getValue().getValue(), wrapper.payload());
}
assertNull(wrapper.next());
}
assertNull(wrapper.next());
// test the sorted iterator wrapper with contexts
wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator();
while (actualEntries.hasNext()) {
Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
Set<BytesRef> actualCtxs = entry.getValue().getValue();
assertEquals(actualCtxs, wrapper.contexts());
try (Directory tempDir = getDirectory()) {
InputIterator wrapper = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsortedWithContexts), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator();
while (actualEntries.hasNext()) {
Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
Set<BytesRef> actualCtxs = entry.getValue().getValue();
assertEquals(actualCtxs, wrapper.contexts());
}
assertNull(wrapper.next());
}
assertNull(wrapper.next());
// test the sorted iterator wrapper with contexts and payload
wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithPayloadAndContext), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>>> expectedPayloadContextEntries = sortedWithPayloadAndContext.entrySet().iterator();
while (expectedPayloadContextEntries.hasNext()) {
Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>> entry = expectedPayloadContextEntries.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
Set<BytesRef> actualCtxs = entry.getValue().getValue().getValue();
assertEquals(actualCtxs, wrapper.contexts());
BytesRef actualPayload = entry.getValue().getValue().getKey();
assertEquals(actualPayload, wrapper.payload());
try (Directory tempDir = getDirectory()) {
InputIterator wrapper = new SortedInputIterator(tempDir, "sorter", new InputArrayIterator(unsortedWithPayloadAndContext), comparator);
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>>> expectedPayloadContextEntries = sortedWithPayloadAndContext.entrySet().iterator();
while (expectedPayloadContextEntries.hasNext()) {
Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>> entry = expectedPayloadContextEntries.next();
assertEquals(entry.getKey(), wrapper.next());
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
Set<BytesRef> actualCtxs = entry.getValue().getValue().getValue();
assertEquals(actualCtxs, wrapper.contexts());
BytesRef actualPayload = entry.getValue().getValue().getKey();
assertEquals(actualPayload, wrapper.payload());
}
assertNull(wrapper.next());
}
assertNull(wrapper.next());
// test the unsorted iterator wrapper with payloads
wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
InputIterator wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();
BytesRef key;
while ((key = wrapper.next()) != null) {
@ -126,19 +136,21 @@ public class TestInputIterator extends LuceneTestCase {
assertEquals(sorted, actual);
// test the sorted iterator wrapper without payloads
InputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparator);
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
while (expectedWithoutPayload.hasNext()) {
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
try (Directory tempDir = getDirectory()) {
InputIterator wrapperWithoutPayload = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsortedWithoutPayload), comparator);
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
while (expectedWithoutPayload.hasNext()) {
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
assertNull(wrapperWithoutPayload.payload());
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
assertNull(wrapperWithoutPayload.payload());
}
assertNull(wrapperWithoutPayload.next());
}
assertNull(wrapperWithoutPayload.next());
// test the unsorted iterator wrapper without payloads
wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
InputIterator wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
TreeMap<BytesRef, Long> actualWithoutPayload = new TreeMap<>();
while ((key = wrapperWithoutPayload.next()) != null) {
long value = wrapperWithoutPayload.weight();
@ -157,4 +169,12 @@ public class TestInputIterator extends LuceneTestCase {
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -34,8 +34,8 @@ import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
import org.apache.lucene.analysis.CannedBinaryTokenStream;
import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
@ -48,9 +48,11 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
@ -58,7 +60,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class AnalyzingSuggesterTest extends LuceneTestCase {
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
Iterable<Input> keys = shuffle(
@ -71,8 +73,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("barbara", 1)
);
Directory tempDir = getDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
@ -106,7 +110,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
public void testKeywordWithPayloads() throws Exception {
@ -119,7 +123,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("barbara", 6, new BytesRef("for all the fish")));
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
for (int i = 0; i < 2; i++) {
// top N of 2, but only foo is available
@ -160,7 +165,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(6, results.get(2).value, 0.01F);
assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
}
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
public void testRandomRealisticKeys() throws IOException {
@ -180,7 +185,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
Analyzer indexAnalyzer = new MockAnalyzer(random());
Analyzer queryAnalyzer = new MockAnalyzer(random());
AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(indexAnalyzer, queryAnalyzer,
Directory tempDir = getDirectory();
AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(tempDir, "suggest", indexAnalyzer, queryAnalyzer,
AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, random().nextBoolean());
boolean doPayloads = random().nextBoolean();
if (doPayloads) {
@ -205,7 +212,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
}
IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer);
IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer, tempDir);
}
// TODO: more tests
@ -217,8 +224,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("the ghost of christmas past", 50),
};
Directory tempDir = getDirectory();
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard, standard,
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard, standard,
AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false);
suggester.build(new InputArrayIterator(keys));
@ -240,17 +248,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
standard.close();
IOUtils.close(standard, tempDir);
}
public void testEmpty() throws Exception {
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard);
suggester.build(new InputArrayIterator(new Input[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
standard.close();
IOUtils.close(standard, tempDir);
}
public void testNoSeps() throws Exception {
@ -262,7 +271,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
int options = 0;
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
@ -275,7 +285,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
// complete to "abcd", which has higher weight so should
// appear first:
assertEquals("abcd", r.get(0).key.toString());
a.close();
IOUtils.close(a, tempDir);
}
public void testGraphDups() throws Exception {
@ -330,7 +340,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("wi fi network is fast", 10),
};
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
@ -341,7 +352,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(50, results.get(0).value);
assertEquals("wi fi network is fast", results.get(1).key);
assertEquals(10, results.get(1).value);
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
public void testInputPathRequired() throws Exception {
@ -396,11 +407,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("ab xc", 50),
new Input("ba xd", 50),
};
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
private static Token token(String term, int posInc, int posLength) {
@ -471,7 +483,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
int options = AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP;
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 1),
new Input("x y z", 3),
@ -505,13 +518,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
}
}
a.close();
IOUtils.close(a, tempDir);
}
public void testNonExactFirst() throws Exception {
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 1),
@ -543,7 +557,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
}
}
a.close();
IOUtils.close(a, tempDir);
}
// Holds surface form separately:
@ -764,7 +778,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a,
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, true);
if (doPayloads) {
suggester.build(new InputArrayIterator(shuffle(payloadKeys)));
@ -882,12 +897,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
}
}
a.close();
IOUtils.close(a, tempDir);
}
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 2, -1, true);
suggester.build(new InputArrayIterator(shuffle(new Input("a", 40),
new Input("a ", 50), new Input(" a", 60))));
@ -897,12 +913,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(60, results.get(0).value);
assertEquals("a ", results.get(1).key);
assertEquals(50, results.get(1).value);
a.close();
IOUtils.close(a, tempDir);
}
public void testQueueExhaustion() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("a", 2),
@ -912,14 +929,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}));
suggester.lookup("a", false, 4);
a.close();
IOUtils.close(a, tempDir);
}
public void testExactFirstMissingResult() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("a", 5),
@ -959,7 +977,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(4, results.get(1).value);
assertEquals("a b", results.get(2).key);
assertEquals(3, results.get(2).value);
a.close();
IOUtils.close(a, tempDir);
}
public void testDupSurfaceFormsMissingResults() throws Exception {
@ -986,7 +1004,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
suggester.build(new InputArrayIterator(shuffle(
new Input("hambone", 6),
@ -1018,7 +1037,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(6, results.get(0).value);
assertEquals("nellie", results.get(1).key);
assertEquals(5, results.get(1).value);
a.close();
IOUtils.close(a, tempDir);
}
public void testDupSurfaceFormsMissingResults2() throws Exception {
@ -1055,7 +1074,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("a", 6),
@ -1088,7 +1108,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
assertEquals(6, results.get(0).value);
assertEquals("b", results.get(1).key);
assertEquals(5, results.get(1).value);
a.close();
IOUtils.close(a, tempDir);
}
public void test0ByteKeys() throws Exception {
@ -1128,19 +1148,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("a a", 50),
new Input("a b", 50),
}));
a.close();
IOUtils.close(a, tempDir);
}
public void testDupSurfaceFormsMissingResults3() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("a a", 7),
new Input("a a", 7),
@ -1149,19 +1171,20 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
new Input("a b", 5),
}));
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
a.close();
IOUtils.close(tempDir, a);
}
public void testEndingSpace() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("i love lucy", 7),
new Input("isla de muerta", 8),
}));
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
a.close();
IOUtils.close(a, tempDir);
}
public void testTooManyExpansions() throws Exception {
@ -1188,15 +1211,17 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
suggester.build(new InputArrayIterator(new Input[] {new Input("a", 1)}));
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
a.close();
IOUtils.close(a, tempDir);
}
public void testIllegalLookupArgument() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("а где Люси?", 7),
}));
@ -1212,7 +1237,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
} catch (IllegalArgumentException e) {
// expected
}
a.close();
IOUtils.close(a, tempDir);
}
static final Iterable<Input> shuffle(Input...values) {
@ -1227,7 +1252,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
// TODO: we need BaseSuggesterTestCase?
public void testTooLongSuggestion() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a);
String bigString = TestUtil.randomSimpleString(random(), 30000, 30000);
try {
suggester.build(new InputArrayIterator(new Input[] {
@ -1238,6 +1264,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
} catch (IllegalArgumentException iae) {
// expected
}
a.close();
IOUtils.close(a, tempDir);
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -43,8 +43,11 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -62,7 +65,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
keys.add(new Input("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
@ -73,7 +77,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals("foo bar boo far", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
public void testNonLatinRandomEdits() throws IOException {
@ -84,7 +88,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
keys.add(new Input("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
@ -95,7 +100,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals("фуу бар буу фар", results.get(0).key.toString());
assertEquals(12, results.get(0).value, 0.01F);
}
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
@ -108,7 +113,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
};
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("bariar", random()), false, 2);
@ -171,7 +177,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
/**
@ -183,7 +189,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
suggester.build(new InputArrayIterator(keys));
@ -204,7 +211,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals("the ghost of christmas past", results.get(0).key.toString());
assertEquals(50, results.get(0).value, 0.01F);
standard.close();
IOUtils.close(standard, tempDir);
}
public void testNoSeps() throws Exception {
@ -216,7 +223,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
int options = 0;
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, true, 1, true, 1, 3, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",a, a, options, 256, -1, true, 1, true, 1, 3, false);
suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
@ -229,7 +237,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
// complete to "abcd", which has higher weight so should
// appear first:
assertEquals("abcd", r.get(0).key.toString());
a.close();
IOUtils.close(a, tempDir);
}
public void testGraphDups() throws Exception {
@ -283,7 +291,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
new Input("wifi network is slow", 50),
new Input("wi fi network is fast", 10),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
@ -295,17 +304,18 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals(50, results.get(0).value);
assertEquals("wi fi network is fast", results.get(1).key);
assertEquals(10, results.get(1).value);
analyzer.close();
IOUtils.close(tempDir, analyzer);
}
public void testEmpty() throws Exception {
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
suggester.build(new InputArrayIterator(new Input[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
public void testInputPathRequired() throws Exception {
@ -360,11 +370,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
new Input("ab xc", 50),
new Input("ba xd", 50),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
analyzer.close();
IOUtils.close(analyzer, tempDir);
}
private static Token token(String term, int posInc, int posLength) {
@ -430,7 +441,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testExactFirst() throws Exception {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 1),
new Input("x y z", 3),
@ -464,13 +476,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
}
}
a.close();
IOUtils.close(a, tempDir);
}
public void testNonExactFirst() throws Exception {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 1),
@ -502,7 +515,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
}
}
a.close();
IOUtils.close(a, tempDir);
}
// Holds surface form separately:
@ -697,7 +710,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
FuzzySuggester suggester = new FuzzySuggester(a, a,
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, true, 1, false, 1, 3, unicodeAware);
suggester.build(new InputArrayIterator(keys));
@ -836,12 +850,13 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}
a.close();
IOUtils.close(a, tempDir);
}
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, true, 1, true, 1, 3, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 2, -1, true, 1, true, 1, 3, false);
List<Input> keys = Arrays.asList(new Input[] {
new Input("a", 40),
@ -858,12 +873,13 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals(60, results.get(0).value);
assertEquals("a ", results.get(1).key);
assertEquals(50, results.get(1).value);
a.close();
IOUtils.close(a, tempDir);
}
public void testEditSeps() throws Exception {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
Directory tempDir = getDirectory();
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
List<Input> keys = Arrays.asList(new Input[] {
new Input("foo bar", 40),
@ -879,7 +895,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
assertEquals("[barbaz/60, barbazfoo/10]", suggester.lookup("bar baz", false, 5).toString());
assertEquals("[barbazfoo/10]", suggester.lookup("bar baz foo", false, 5).toString());
a.close();
IOUtils.close(a, tempDir);
}
@SuppressWarnings("fallthrough")
@ -978,7 +994,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
boolean transpositions = random().nextBoolean();
// TODO: test graph analyzers
// TODO: test exactFirst / preserveSep permutations
FuzzySuggester suggest = new FuzzySuggester(a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
Directory tempDir = getDirectory();
FuzzySuggester suggest = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
if (VERBOSE) {
System.out.println("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
@ -1022,7 +1039,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
assertEquals(expected.size(), actual.size());
}
a.close();
IOUtils.close(a, tempDir);
}
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
@ -1194,4 +1211,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
return ref;
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -18,8 +18,11 @@ package org.apache.lucene.search.suggest.fst;
*/
import org.apache.lucene.search.suggest.InMemorySorter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.OfflineSorter;
import org.junit.Test;
@ -27,9 +30,13 @@ import org.junit.Test;
public class BytesRefSortersTest extends LuceneTestCase {
@Test
public void testExternalRefSorter() throws Exception {
ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter());
Directory tempDir = newDirectory();
if (tempDir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
}
ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter(tempDir, "temp"));
check(s);
s.close();
IOUtils.close(s, tempDir);
}
@Test

View File

@ -20,15 +20,18 @@ package org.apache.lucene.search.suggest.fst;
import java.nio.charset.StandardCharsets;
import java.util.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.fst.FSTCompletion.Completion;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.*;
/**
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
public static Input tf(String t, int v) {
return new Input(t, v);
}
@ -155,7 +158,8 @@ public class FSTCompletionTest extends LuceneTestCase {
}
public void testLargeInputConstantWeights() throws Exception {
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
Directory tempDir = getDirectory();
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst", 10, true);
Random r = random();
List<Input> keys = new ArrayList<>();
@ -175,12 +179,14 @@ public class FSTCompletionTest extends LuceneTestCase {
}
previous = current;
}
tempDir.close();
}
public void testMultilingualInput() throws Exception {
List<Input> input = LookupBenchmarkTest.readTop50KWiki();
FSTCompletionLookup lookup = new FSTCompletionLookup();
Directory tempDir = getDirectory();
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
lookup.build(new InputArrayIterator(input));
assertEquals(input.size(), lookup.getCount());
for (Input tf : input) {
@ -192,6 +198,7 @@ public class FSTCompletionTest extends LuceneTestCase {
assertEquals(5, result.size());
assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
assertTrue(result.get(1).key.toString().equals("with")); // highest count.
tempDir.close();
}
public void testEmptyInput() throws Exception {
@ -207,7 +214,8 @@ public class FSTCompletionTest extends LuceneTestCase {
freqs.add(new Input("" + rnd.nextLong(), weight));
}
FSTCompletionLookup lookup = new FSTCompletionLookup();
Directory tempDir = getDirectory();
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
for (Input tf : freqs) {
@ -219,6 +227,7 @@ public class FSTCompletionTest extends LuceneTestCase {
}
}
}
tempDir.close();
}
private CharSequence stringToCharSequence(String prefix) {
@ -262,4 +271,12 @@ public class FSTCompletionTest extends LuceneTestCase {
len = Math.max(len, s.length());
return len;
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -1,66 +0,0 @@
package org.apache.lucene.search.suggest.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.OfflineSorter;
/**
* Try to build a suggester from a large data set. The input is a simple text
* file, newline-delimited.
*/
public class LargeInputFST {
public static void main(String[] args) throws IOException {
Path input = Paths.get("/home/dweiss/tmp/shuffled.dict");
int buckets = 20;
int shareMaxTail = 10;
ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter());
FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);
BufferedReader reader = Files.newBufferedReader(input, StandardCharsets.UTF_8);
BytesRefBuilder scratch = new BytesRefBuilder();
String line;
int count = 0;
while ((line = reader.readLine()) != null) {
scratch.copyChars(line);
builder.add(scratch.get(), count % buckets);
if ((count++ % 100000) == 0) {
System.err.println("Line: " + count);
}
}
System.out.println("Building FSTCompletion.");
FSTCompletion completion = builder.build();
Path fstFile = Paths.get("completion.fst");
System.out.println("Done. Writing automaton: " + fstFile.toAbsolutePath());
completion.getFST().save(fstFile);
sorter.close();
}
}

View File

@ -19,9 +19,11 @@ package org.apache.lucene.search.suggest.fst;
import java.util.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -37,7 +39,8 @@ public class WFSTCompletionTest extends LuceneTestCase {
};
Random random = new Random(random().nextLong());
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst");
suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
@ -75,11 +78,13 @@ public class WFSTCompletionTest extends LuceneTestCase {
assertEquals(10, results.get(1).value, 0.01F);
assertEquals("barbara", results.get(2).key.toString());
assertEquals(6, results.get(2).value, 0.01F);
tempDir.close();
}
public void testExactFirst() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", true);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 20),
@ -99,11 +104,13 @@ public class WFSTCompletionTest extends LuceneTestCase {
assertEquals(20, results.get(1).value);
}
}
tempDir.close();
}
public void testNonExactFirst() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[] {
new Input("x y", 20),
@ -123,6 +130,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
assertEquals(2, results.get(1).value);
}
}
tempDir.close();
}
public void testRandom() throws Exception {
@ -153,7 +161,8 @@ public class WFSTCompletionTest extends LuceneTestCase {
keys[i] = new Input(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(keys));
assertEquals(numWords, suggester.getCount());
@ -196,6 +205,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
}
}
tempDir.close();
}
public void test0ByteKeys() throws Exception {
@ -204,20 +214,32 @@ public class WFSTCompletionTest extends LuceneTestCase {
BytesRef key2 = new BytesRef(3);
key1.length = 3;
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[] {
new Input(key1, 50),
new Input(key2, 50),
}));
tempDir.close();
}
public void testEmpty() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
Directory tempDir = getDirectory();
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
suggester.build(new InputArrayIterator(new Input[0]));
assertEquals(0, suggester.getCount());
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
tempDir.close();
}
private Directory getDirectory() {
Directory dir = newDirectory();
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
}
return dir;
}
}

View File

@ -23,9 +23,12 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.CRC32;
@ -1165,5 +1168,26 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
in.close(); // close again
dir.close();
}
}
public void testCreateTempOutput() throws Throwable {
Directory dir = getDirectory(createTempDir());
List<String> names = new ArrayList<>();
int iters = atLeast(50);
for(int iter=0;iter<iters;iter++) {
IndexOutput out = dir.createTempOutput("foo", "bar", newIOContext(random()));
names.add(out.getName());
out.writeVInt(iter);
out.close();
}
for(int iter=0;iter<iters;iter++) {
IndexInput in = dir.openInput(names.get(iter), newIOContext(random()));
assertEquals(iter, in.readVInt());
in.close();
}
Set<String> files = new HashSet<String>(Arrays.asList(dir.listAll()));
// In case ExtraFS struck:
files.remove("extra0");
assertEquals(new HashSet<String>(names), files);
dir.close();
}
}

View File

@ -39,7 +39,7 @@ public class MockIndexOutputWrapper extends IndexOutput {
/** Construct an empty output buffer. */
public MockIndexOutputWrapper(MockDirectoryWrapper dir, IndexOutput delegate, String name) {
super("MockIndexOutputWrapper(" + delegate + ")");
super("MockIndexOutputWrapper(" + delegate + ")", delegate.getName());
this.dir = dir;
this.name = name;
this.delegate = delegate;

View File

@ -1,5 +1,22 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.net.URI;
import java.nio.file.FileSystem;
@ -30,23 +47,6 @@ import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import com.carrotsearch.randomizedtesting.RandomizedContext;
import com.carrotsearch.randomizedtesting.rules.TestRuleAdapter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Checks and cleans up temporary files.
*
@ -113,9 +113,6 @@ final class TestRuleTemporaryFilesCleanup extends TestRuleAdapter {
assert tempDirBase == null;
fileSystem = initializeFileSystem();
javaTempDir = initializeJavaTempDir();
// So all code using OfflineSorter (suggesters, BKD tree, NumericRangeTree) see MockFS goodness, e.g. catching leaked file handles:
OfflineSorter.setDefaultTempDir(javaTempDir);
}
// os/config-independent limit for too many open files
@ -236,7 +233,7 @@ final class TestRuleTemporaryFilesCleanup extends TestRuleAdapter {
}
}
final Path getPerTestClassTempDir() {
Path getPerTestClassTempDir() {
if (tempDirBase == null) {
RandomizedContext ctx = RandomizedContext.current();
Class<?> clazz = ctx.getTargetClass();

View File

@ -16,6 +16,7 @@ package org.apache.lucene.util;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.DataInput;
@ -59,7 +60,7 @@ public class ThrottledIndexOutput extends IndexOutput {
public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis,
long closeDelayMillis, long seekDelayMillis, long minBytesWritten,
IndexOutput delegate) {
super("ThrottledIndexOutput(" + delegate + ")");
super("ThrottledIndexOutput(" + delegate + ")", delegate == null ? "n/a" : delegate.getName());
assert bytesPerSecond > 0;
this.delegate = delegate;
this.bytesPerSecond = bytesPerSecond;
@ -117,8 +118,9 @@ public class ThrottledIndexOutput extends IndexOutput {
}
private static final void sleep(long ms) {
if (ms <= 0)
if (ms <= 0) {
return;
}
try {
Thread.sleep(ms);
} catch (InterruptedException e) {

View File

@ -17,7 +17,11 @@ package org.apache.solr.spelling.suggest;
* limitations under the License.
*/
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
@ -41,4 +45,23 @@ public abstract class LookupFactory {
* <b>NOTE:</b> not all {@link Lookup} implementations store in-memory data structures
* */
public abstract String storeFileName();
/** Non-null if this sugggester created a temp dir, needed only during build */
private static FSDirectory tmpBuildDir;
protected static synchronized FSDirectory getTempDir() {
if (tmpBuildDir == null) {
// Lazy init
String tempDirPath = System.getProperty("java.io.tmpdir");
if (tempDirPath == null) {
throw new RuntimeException("Java has no temporary folder property (java.io.tmpdir)?");
}
try {
tmpBuildDir = FSDirectory.open(Paths.get(tempDirPath));
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
return tmpBuildDir;
}
}

View File

@ -120,8 +120,7 @@ public class AnalyzingLookupFactory extends LookupFactory {
? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString())
: false;
return new AnalyzingSuggester(indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
maxGraphExpansions, preservePositionIncrements);
}

View File

@ -60,7 +60,7 @@ public class FSTLookupFactory extends LookupFactory {
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
: true;
return new FSTCompletionLookup(buckets, exactMatchFirst);
return new FSTCompletionLookup(getTempDir(), "suggester", buckets, exactMatchFirst);
}
@Override

View File

@ -134,7 +134,7 @@ public class FuzzyLookupFactory extends LookupFactory {
? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
: FuzzySuggester.DEFAULT_UNICODE_AWARE;
return new FuzzySuggester(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix,
minFuzzyLength, unicodeAware);
}

View File

@ -17,8 +17,6 @@ package org.apache.solr.spelling.suggest.fst;
* limitations under the License.
*/
import java.io.File;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.fst.*;
import org.apache.solr.common.util.NamedList;
@ -48,7 +46,7 @@ public class WFSTLookupFactory extends LookupFactory {
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
: true;
return new WFSTCompletionLookup(exactMatchFirst);
return new WFSTCompletionLookup(getTempDir(), "suggester", exactMatchFirst);
}
@Override

View File

@ -31,7 +31,7 @@ public class TSTLookupFactory extends LookupFactory {
@Override
public Lookup create(NamedList params, SolrCore core) {
return new TSTLookup();
return new TSTLookup(getTempDir(), "suggester");
}
@Override

View File

@ -37,7 +37,7 @@ public class CachedIndexOutput extends ReusedBufferedIndexOutput {
public CachedIndexOutput(BlockDirectory directory, IndexOutput dest,
int blockSize, String name, Cache cache, int bufferSize) {
super("dest=" + dest + " name=" + name, bufferSize);
super("dest=" + dest + " name=" + name, name, bufferSize);
this.directory = directory;
this.dest = dest;
this.blockSize = blockSize;

View File

@ -43,12 +43,12 @@ public abstract class ReusedBufferedIndexOutput extends IndexOutput {
private final Store store;
public ReusedBufferedIndexOutput(String resourceDescription) {
this(resourceDescription, BUFFER_SIZE);
public ReusedBufferedIndexOutput(String resourceDescription, String name) {
this(resourceDescription, name, BUFFER_SIZE);
}
public ReusedBufferedIndexOutput(String resourceDescription, int bufferSize) {
super(resourceDescription);
public ReusedBufferedIndexOutput(String resourceDescription, String name, int bufferSize) {
super(resourceDescription, name);
checkBufferSize(bufferSize);
this.bufferSize = bufferSize;
store = BufferStore.instance(bufferSize);

View File

@ -107,7 +107,12 @@ public class HdfsDirectory extends BaseDirectory {
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
return new HdfsFileWriter(getFileSystem(), new Path(hdfsDirPath, name));
return new HdfsFileWriter(getFileSystem(), new Path(hdfsDirPath, name), name);
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
throw new UnsupportedOperationException();
}
private String[] getNormalNames(List<String> files) {

View File

@ -37,8 +37,8 @@ public class HdfsFileWriter extends OutputStreamIndexOutput {
public static final String HDFS_SYNC_BLOCK = "solr.hdfs.sync.block";
public static final int BUFFER_SIZE = 16384;
public HdfsFileWriter(FileSystem fileSystem, Path path) throws IOException {
super("fileSystem=" + fileSystem + " path=" + path, getOutputStream(fileSystem, path), BUFFER_SIZE);
public HdfsFileWriter(FileSystem fileSystem, Path path, String name) throws IOException {
super("fileSystem=" + fileSystem + " path=" + path, name, getOutputStream(fileSystem, path), BUFFER_SIZE);
}
private static final OutputStream getOutputStream(FileSystem fileSystem, Path path) throws IOException {