mirror of https://github.com/apache/lucene.git
LUCENE-6829: OfflineSorter now uses Directory API; add Directory.createTempOutput and IndexOutput.getName
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1708760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
673654321b
commit
c855dd5a33
|
@ -59,6 +59,13 @@ API Changes
|
|||
* LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
|
||||
Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
|
||||
|
||||
* LUCENE-6829: OfflineSorter, and the classes that use it (suggesters,
|
||||
hunspell) now do all temporary file IO via Directory instead of
|
||||
directly through java's temp dir. Directory.createTempOutput
|
||||
creates a uniquely named IndexOutput, and the new
|
||||
IndexOutput.getName returns its name (Dawid Weiss, Robert Muir, Mike
|
||||
McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
|
||||
|
|
|
@ -17,7 +17,39 @@ package org.apache.lucene.analysis.hunspell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -39,34 +71,6 @@ import org.apache.lucene.util.fst.IntSequenceOutputs;
|
|||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* In-memory structure for the dictionary (.dic) and affix (.aff)
|
||||
* data of a hunspell dictionary.
|
||||
|
@ -139,7 +143,7 @@ public class Dictionary {
|
|||
// when set, some words have exceptional stems, and the last entry is a pointer to stemExceptions
|
||||
boolean hasStemExceptions;
|
||||
|
||||
private final Path tempDir = OfflineSorter.getDefaultTempDir(); // TODO: make this configurable?
|
||||
private final Path tempPath = getDefaultTempDir(); // TODO: make this configurable?
|
||||
|
||||
boolean ignoreCase;
|
||||
boolean complexPrefixes;
|
||||
|
@ -167,19 +171,21 @@ public class Dictionary {
|
|||
String language;
|
||||
// true if case algorithms should use alternate (Turkish/Azeri) mapping
|
||||
boolean alternateCasing;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
|
||||
* and dictionary files.
|
||||
* You have to close the provided InputStreams yourself.
|
||||
*
|
||||
* @param tempDir Directory to use for offline sorting
|
||||
* @param tempFileNamePrefix prefix to use to generate temp file names
|
||||
* @param affix InputStream for reading the hunspell affix file (won't be closed).
|
||||
* @param dictionary InputStream for reading the hunspell dictionary file (won't be closed).
|
||||
* @throws IOException Can be thrown while reading from the InputStreams
|
||||
* @throws ParseException Can be thrown if the content of the files does not meet expected formats
|
||||
*/
|
||||
public Dictionary(InputStream affix, InputStream dictionary) throws IOException, ParseException {
|
||||
this(affix, Collections.singletonList(dictionary), false);
|
||||
public Dictionary(Directory tempDir, String tempFileNamePrefix, InputStream affix, InputStream dictionary) throws IOException, ParseException {
|
||||
this(tempDir, tempFileNamePrefix, affix, Collections.singletonList(dictionary), false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -187,18 +193,20 @@ public class Dictionary {
|
|||
* and dictionary files.
|
||||
* You have to close the provided InputStreams yourself.
|
||||
*
|
||||
* @param tempDir Directory to use for offline sorting
|
||||
* @param tempFileNamePrefix prefix to use to generate temp file names
|
||||
* @param affix InputStream for reading the hunspell affix file (won't be closed).
|
||||
* @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
|
||||
* @throws IOException Can be thrown while reading from the InputStreams
|
||||
* @throws ParseException Can be thrown if the content of the files does not meet expected formats
|
||||
*/
|
||||
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
|
||||
public Dictionary(Directory tempDir, String tempFileNamePrefix, InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
|
||||
this.ignoreCase = ignoreCase;
|
||||
this.needsInputCleaning = ignoreCase;
|
||||
this.needsOutputCleaning = false; // set if we have an OCONV
|
||||
flagLookup.add(new BytesRef()); // no flags -> ord 0
|
||||
|
||||
Path aff = Files.createTempFile(tempDir, "affix", "aff");
|
||||
Path aff = Files.createTempFile(tempPath, "affix", "aff");
|
||||
OutputStream out = new BufferedOutputStream(Files.newOutputStream(aff));
|
||||
InputStream aff1 = null;
|
||||
InputStream aff2 = null;
|
||||
|
@ -224,7 +232,7 @@ public class Dictionary {
|
|||
// read dictionary entries
|
||||
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
|
||||
Builder<IntsRef> b = new Builder<>(FST.INPUT_TYPE.BYTE4, o);
|
||||
readDictionaryFiles(dictionaries, decoder, b);
|
||||
readDictionaryFiles(tempDir, tempFileNamePrefix, dictionaries, decoder, b);
|
||||
words = b.finish();
|
||||
aliases = null; // no longer needed
|
||||
morphAliases = null; // no longer needed
|
||||
|
@ -766,7 +774,7 @@ public class Dictionary {
|
|||
return Math.max(pos1, pos2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads the dictionary file through the provided InputStreams, building up the words map
|
||||
*
|
||||
|
@ -774,13 +782,13 @@ public class Dictionary {
|
|||
* @param decoder CharsetDecoder used to decode the contents of the file
|
||||
* @throws IOException Can be thrown while reading from the file
|
||||
*/
|
||||
private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException {
|
||||
private void readDictionaryFiles(Directory tempDir, String tempFileNamePrefix, List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words) throws IOException {
|
||||
BytesRefBuilder flagsScratch = new BytesRefBuilder();
|
||||
IntsRefBuilder scratchInts = new IntsRefBuilder();
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
Path unsorted = Files.createTempFile(tempDir, "unsorted", "dat");
|
||||
|
||||
IndexOutput unsorted = tempDir.createTempOutput(tempFileNamePrefix, "dat", IOContext.DEFAULT);
|
||||
try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
|
||||
for (InputStream dictionary : dictionaries) {
|
||||
BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
|
||||
|
@ -823,9 +831,8 @@ public class Dictionary {
|
|||
}
|
||||
}
|
||||
}
|
||||
Path sorted = Files.createTempFile(tempDir, "sorted", "dat");
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, new Comparator<BytesRef>() {
|
||||
BytesRef scratch1 = new BytesRef();
|
||||
BytesRef scratch2 = new BytesRef();
|
||||
|
||||
|
@ -862,21 +869,23 @@ public class Dictionary {
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
String sorted;
|
||||
boolean success = false;
|
||||
try {
|
||||
sorter.sort(unsorted, sorted);
|
||||
sorted = sorter.sort(unsorted.getName());
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
Files.delete(unsorted);
|
||||
tempDir.deleteFile(unsorted.getName());
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(unsorted);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, unsorted.getName());
|
||||
}
|
||||
}
|
||||
|
||||
boolean success2 = false;
|
||||
ByteSequencesReader reader = new ByteSequencesReader(sorted);
|
||||
try {
|
||||
|
||||
try (ByteSequencesReader reader = new ByteSequencesReader(tempDir.openInput(sorted, IOContext.READONCE))) {
|
||||
BytesRefBuilder scratchLine = new BytesRefBuilder();
|
||||
|
||||
// TODO: the flags themselves can be double-chars (long) or also numeric
|
||||
|
@ -956,11 +965,10 @@ public class Dictionary {
|
|||
words.add(scratchInts.get(), currentOrds.get());
|
||||
success2 = true;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(reader);
|
||||
if (success2) {
|
||||
Files.delete(sorted);
|
||||
tempDir.deleteFile(sorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorted);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sorted);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1245,4 +1253,33 @@ public class Dictionary {
|
|||
public boolean getIgnoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
||||
private static Path DEFAULT_TEMP_DIR;
|
||||
|
||||
/** Used by test framework */
|
||||
public static void setDefaultTempDir(Path tempDir) {
|
||||
DEFAULT_TEMP_DIR = tempDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default temporary directory. By default, java.io.tmpdir. If not accessible
|
||||
* or not available, an IOException is thrown
|
||||
*/
|
||||
synchronized static Path getDefaultTempDir() throws IOException {
|
||||
if (DEFAULT_TEMP_DIR == null) {
|
||||
// Lazy init
|
||||
String tempDirPath = System.getProperty("java.io.tmpdir");
|
||||
if (tempDirPath == null) {
|
||||
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
|
||||
}
|
||||
Path tempDirectory = Paths.get(tempDirPath);
|
||||
if (Files.isWritable(tempDirectory) == false) {
|
||||
throw new IOException("Java's temporary folder not present or writeable?: "
|
||||
+ tempDirectory.toAbsolutePath());
|
||||
}
|
||||
DEFAULT_TEMP_DIR = tempDirectory;
|
||||
}
|
||||
|
||||
return DEFAULT_TEMP_DIR;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis.hunspell;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -28,6 +30,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
@ -48,6 +52,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||
private static final String PARAM_DICTIONARY = "dictionary";
|
||||
private static final String PARAM_AFFIX = "affix";
|
||||
// NOTE: this one is currently unused?:
|
||||
private static final String PARAM_RECURSION_CAP = "recursionCap";
|
||||
private static final String PARAM_IGNORE_CASE = "ignoreCase";
|
||||
private static final String PARAM_LONGEST_ONLY = "longestOnly";
|
||||
|
@ -91,7 +96,12 @@ public class HunspellStemFilterFactory extends TokenFilterFactory implements Res
|
|||
}
|
||||
affix = loader.openResource(affixFile);
|
||||
|
||||
this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
|
||||
Path tempPath = Files.createTempDirectory(Dictionary.getDefaultTempDir(), "Hunspell");
|
||||
try (Directory tempDir = FSDirectory.open(tempPath)) {
|
||||
this.dictionary = new Dictionary(tempDir, "hunspell", affix, dictionaries, ignoreCase);
|
||||
} finally {
|
||||
IOUtils.rm(tempPath);
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
|
||||
} finally {
|
||||
|
|
|
@ -85,6 +85,7 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
|
|||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
|
@ -435,7 +436,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
|
||||
try {
|
||||
return new Dictionary(affixStream, dictStream);
|
||||
return new Dictionary(new RAMDirectory(), "dictionary", affixStream, dictStream);
|
||||
} catch (Exception ex) {
|
||||
Rethrow.rethrow(ex);
|
||||
return null; // unreachable code
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.text.ParseException;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -61,7 +62,7 @@ public abstract class StemmerTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
try {
|
||||
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
|
||||
Dictionary dictionary = new Dictionary(new RAMDirectory(), "dictionary", affixStream, Arrays.asList(dictStreams), ignoreCase);
|
||||
stemmer = new Stemmer(dictionary);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream);
|
||||
|
|
|
@ -24,6 +24,8 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -51,8 +53,11 @@ public class Test64kAffixes extends LuceneTestCase {
|
|||
dictWriter.write("1\ndrink/2\n");
|
||||
dictWriter.close();
|
||||
|
||||
try (InputStream affStream = Files.newInputStream(affix); InputStream dictStream = Files.newInputStream(dict)) {
|
||||
Dictionary dictionary = new Dictionary(affStream, dictStream);
|
||||
try (InputStream affStream = Files.newInputStream(affix); InputStream dictStream = Files.newInputStream(dict); Directory tempDir2 = newDirectory()) {
|
||||
if (tempDir2 instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) tempDir2).setEnableVirusScanner(false);
|
||||
}
|
||||
Dictionary dictionary = new Dictionary(tempDir2, "dictionary", affStream, dictStream);
|
||||
Stemmer stemmer = new Stemmer(dictionary);
|
||||
// drinks should still stem to drink
|
||||
List<CharsRef> stems = stemmer.stem("drinks");
|
||||
|
|
|
@ -22,7 +22,8 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||
|
@ -165,14 +166,14 @@ public class TestAllDictionaries extends LuceneTestCase {
|
|||
IOUtils.rm(tmp);
|
||||
Files.createDirectory(tmp);
|
||||
|
||||
try (InputStream in = Files.newInputStream(f)) {
|
||||
try (InputStream in = Files.newInputStream(f); Directory tempDir = getDirectory()) {
|
||||
TestUtil.unzip(in, tmp);
|
||||
Path dicEntry = tmp.resolve(tests[i+1]);
|
||||
Path affEntry = tmp.resolve(tests[i+2]);
|
||||
|
||||
try (InputStream dictionary = Files.newInputStream(dicEntry);
|
||||
InputStream affix = Files.newInputStream(affEntry)) {
|
||||
Dictionary dic = new Dictionary(affix, dictionary);
|
||||
Dictionary dic = new Dictionary(tempDir, "dictionary", affix, dictionary);
|
||||
System.out.println(tests[i] + "\t" + RamUsageTester.humanSizeOf(dic) + "\t(" +
|
||||
"words=" + RamUsageTester.humanSizeOf(dic.words) + ", " +
|
||||
"flags=" + RamUsageTester.humanSizeOf(dic.flagLookup) + ", " +
|
||||
|
@ -204,11 +205,20 @@ public class TestAllDictionaries extends LuceneTestCase {
|
|||
Path affEntry = tmp.resolve(tests[i+2]);
|
||||
|
||||
try (InputStream dictionary = Files.newInputStream(dicEntry);
|
||||
InputStream affix = Files.newInputStream(affEntry)) {
|
||||
new Dictionary(affix, dictionary);
|
||||
InputStream affix = Files.newInputStream(affEntry);
|
||||
Directory tempDir = getDirectory()) {
|
||||
new Dictionary(tempDir, "dictionary", affix, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,12 +22,13 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||
import org.apache.lucene.util.RamUsageTester;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
|
@ -186,8 +187,12 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
Path affEntry = tmp.resolve(tests[i+2]);
|
||||
|
||||
try (InputStream dictionary = Files.newInputStream(dicEntry);
|
||||
InputStream affix = Files.newInputStream(affEntry)) {
|
||||
Dictionary dic = new Dictionary(affix, dictionary);
|
||||
InputStream affix = Files.newInputStream(affEntry);
|
||||
Directory tempDir = newDirectory()) {
|
||||
if (tempDir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
|
||||
}
|
||||
Dictionary dic = new Dictionary(tempDir, "dictionary", affix, dictionary);
|
||||
System.out.println(tests[i] + "\t" + RamUsageTester.humanSizeOf(dic) + "\t(" +
|
||||
"words=" + RamUsageTester.humanSizeOf(dic.words) + ", " +
|
||||
"flags=" + RamUsageTester.humanSizeOf(dic.flagLookup) + ", " +
|
||||
|
@ -219,8 +224,12 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
Path affEntry = tmp.resolve(tests[i+2]);
|
||||
|
||||
try (InputStream dictionary = Files.newInputStream(dicEntry);
|
||||
InputStream affix = Files.newInputStream(affEntry)) {
|
||||
new Dictionary(affix, dictionary);
|
||||
InputStream affix = Files.newInputStream(affEntry);
|
||||
Directory tempDir = newDirectory()) {
|
||||
if (tempDir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
|
||||
}
|
||||
new Dictionary(tempDir, "dictionary", affix, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,10 @@ import java.io.InputStream;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -41,8 +42,9 @@ public class TestDictionary extends LuceneTestCase {
|
|||
public void testSimpleDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
|
@ -63,13 +65,15 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testCompressedDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
Directory tempDir = getDirectory();
|
||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
|
@ -80,13 +84,15 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testCompressedBeforeSetDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
|
@ -97,13 +103,15 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testCompressedEmptyAliasDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
|
@ -114,15 +122,17 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
// malformed rule causes ParseException
|
||||
public void testInvalidData() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("broken.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
try {
|
||||
new Dictionary(affixStream, dictStream);
|
||||
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
fail("didn't get expected exception");
|
||||
} catch (ParseException expected) {
|
||||
assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
|
||||
|
@ -131,15 +141,17 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
// malformed flags causes ParseException
|
||||
public void testInvalidFlags() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
try {
|
||||
new Dictionary(affixStream, dictStream);
|
||||
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
fail("didn't get expected exception");
|
||||
} catch (Exception expected) {
|
||||
assertTrue(expected.getMessage().startsWith("expected only one flag"));
|
||||
|
@ -147,6 +159,7 @@ public class TestDictionary extends LuceneTestCase {
|
|||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
private class CloseCheckInputStream extends FilterInputStream {
|
||||
|
@ -170,21 +183,22 @@ public class TestDictionary extends LuceneTestCase {
|
|||
public void testResourceCleanup() throws Exception {
|
||||
CloseCheckInputStream affixStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.aff"));
|
||||
CloseCheckInputStream dictStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.dic"));
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
new Dictionary(affixStream, dictStream);
|
||||
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
|
||||
assertFalse(affixStream.isClosed());
|
||||
assertFalse(dictStream.isClosed());
|
||||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
tempDir.close();
|
||||
|
||||
assertTrue(affixStream.isClosed());
|
||||
assertTrue(dictStream.isClosed());
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void testReplacements() throws Exception {
|
||||
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
|
||||
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
|
||||
|
@ -244,4 +258,12 @@ public class TestDictionary extends LuceneTestCase {
|
|||
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
|
||||
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -43,11 +43,14 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
// no multiple try-with to workaround bogus VerifyError
|
||||
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
try {
|
||||
dictionary = new Dictionary(affixStream, dictStream);
|
||||
dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream, dictStream);
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
@ -107,8 +110,9 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
// no multiple try-with to workaround bogus VerifyError
|
||||
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
|
||||
Directory tempDir = getDirectory();
|
||||
try {
|
||||
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
|
||||
d = new Dictionary(tempDir, "dictionary", affixStream, Collections.singletonList(dictStream), true);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream, dictStream);
|
||||
}
|
||||
|
@ -121,5 +125,14 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
};
|
||||
checkOneTerm(a, "NoChAnGy", "NoChAnGy");
|
||||
a.close();
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
private static Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -140,6 +140,9 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
|
|||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) { throw new UnsupportedOperationException(); }
|
||||
|
|
|
@ -29,12 +29,12 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Class for accessing a compound stream.
|
||||
|
@ -172,6 +172,11 @@ final class Lucene50CompoundReader extends Directory {
|
|||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) {
|
||||
|
|
|
@ -32,8 +32,8 @@ import java.util.Iterator;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -57,10 +57,10 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.store.LockValidatingDirectoryWrapper;
|
||||
import org.apache.lucene.store.MergeInfo;
|
||||
import org.apache.lucene.store.RateLimitedIndexOutput;
|
||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
import org.apache.lucene.store.LockValidatingDirectoryWrapper;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -2615,7 +2615,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L);
|
||||
|
||||
info.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
|
||||
trackingDir.getCreatedFiles().clear();
|
||||
trackingDir.clearCreatedFiles();
|
||||
|
||||
setDiagnostics(info, SOURCE_ADDINDEXES_READERS);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* Base implementation for a concrete {@link Directory} that uses a {@link LockFactory} for locking.
|
||||
|
@ -31,6 +32,22 @@ public abstract class BaseDirectory extends Directory {
|
|||
* this Directory instance). */
|
||||
protected final LockFactory lockFactory;
|
||||
|
||||
/** Subclasses can use this to generate temp file name candidates */
|
||||
protected static final Random tempFileRandom;
|
||||
|
||||
static {
|
||||
String prop = System.getProperty("tests.seed");
|
||||
int seed;
|
||||
if (prop != null) {
|
||||
// So if there is a test failure that relied on temp file names,
|
||||
//we remain reproducible based on the test seed:
|
||||
seed = prop.hashCode();
|
||||
} else {
|
||||
seed = (int) System.currentTimeMillis();
|
||||
}
|
||||
tempFileRandom = new Random(seed);
|
||||
}
|
||||
|
||||
/** Sole constructor. */
|
||||
protected BaseDirectory(LockFactory lockFactory) {
|
||||
super();
|
||||
|
|
|
@ -17,9 +17,9 @@ package org.apache.lucene.store;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.util.Collection; // for javadocs
|
||||
|
||||
|
@ -70,8 +70,12 @@ public abstract class Directory implements Closeable {
|
|||
|
||||
/** Creates a new, empty file in the directory with the given name.
|
||||
Returns a stream writing this file. */
|
||||
public abstract IndexOutput createOutput(String name, IOContext context)
|
||||
throws IOException;
|
||||
public abstract IndexOutput createOutput(String name, IOContext context) throws IOException;
|
||||
|
||||
/** Creates a new, empty file for writing in the directory, with a
|
||||
* temporary file name derived from prefix and suffix. Use
|
||||
* {@link IndexOutput#getName} to see what name was used. */
|
||||
public abstract IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Ensure that any writes to these files are moved to
|
||||
|
@ -120,8 +124,7 @@ public abstract class Directory implements Closeable {
|
|||
|
||||
/** Closes the store. */
|
||||
@Override
|
||||
public abstract void close()
|
||||
throws IOException;
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -19,11 +19,14 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.FilterOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.channels.ClosedChannelException; // javadoc @link
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.FileAlreadyExistsException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.OpenOption;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.channels.ClosedChannelException; // javadoc @link
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -220,11 +223,23 @@ public abstract class FSDirectory extends BaseDirectory {
|
|||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
ensureCanWrite(name);
|
||||
return new FSIndexOutput(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
while (true) {
|
||||
String name = prefix + tempFileRandom.nextInt(Integer.MAX_VALUE) + "." + suffix;
|
||||
try {
|
||||
return new FSIndexOutput(name, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW);
|
||||
} catch (FileAlreadyExistsException faee) {
|
||||
// Retry with next random name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void ensureCanWrite(String name) throws IOException {
|
||||
Files.deleteIfExists(directory.resolve(name)); // delete existing, if any
|
||||
}
|
||||
|
@ -273,7 +288,11 @@ public abstract class FSDirectory extends BaseDirectory {
|
|||
static final int CHUNK_SIZE = 8192;
|
||||
|
||||
public FSIndexOutput(String name) throws IOException {
|
||||
super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", new FilterOutputStream(Files.newOutputStream(directory.resolve(name))) {
|
||||
this(name, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE);
|
||||
}
|
||||
|
||||
FSIndexOutput(String name, OpenOption... options) throws IOException {
|
||||
super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", name, new FilterOutputStream(Files.newOutputStream(directory.resolve(name), options)) {
|
||||
// This implementation ensures, that we never write more than CHUNK_SIZE bytes:
|
||||
@Override
|
||||
public void write(byte[] b, int offset, int length) throws IOException {
|
||||
|
|
|
@ -22,13 +22,12 @@ import java.nio.file.AtomicMoveNotSupportedException;
|
|||
import java.nio.file.NoSuchFileException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
* Expert: A Directory instance that switches files between
|
||||
* two other Directory instances.
|
||||
|
@ -155,6 +154,11 @@ public class FileSwitchDirectory extends Directory {
|
|||
return getDirectory(name).createOutput(name, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
return getDirectory("."+suffix).createTempOutput(prefix, suffix, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) throws IOException {
|
||||
List<String> primaryNames = new ArrayList<>();
|
||||
|
|
|
@ -73,6 +73,11 @@ public class FilterDirectory extends Directory {
|
|||
return in.createOutput(name, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
return in.createTempOutput(prefix, suffix, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) throws IOException {
|
||||
in.sync(names);
|
||||
|
|
|
@ -31,15 +31,27 @@ import java.io.IOException;
|
|||
*/
|
||||
public abstract class IndexOutput extends DataOutput implements Closeable {
|
||||
|
||||
/** Full description of this output, e.g. which class such as {@code FSIndexOutput}, and the full path to the file */
|
||||
private final String resourceDescription;
|
||||
|
||||
/** Just the name part from {@code resourceDescription} */
|
||||
private final String name;
|
||||
|
||||
/** Sole constructor. resourceDescription should be non-null, opaque string
|
||||
* describing this resource; it's returned from {@link #toString}. */
|
||||
protected IndexOutput(String resourceDescription) {
|
||||
protected IndexOutput(String resourceDescription, String name) {
|
||||
if (resourceDescription == null) {
|
||||
throw new IllegalArgumentException("resourceDescription must not be null");
|
||||
}
|
||||
this.resourceDescription = resourceDescription;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Returns the name used to create this {@code IndexOutput}. This is especially useful when using
|
||||
* {@link Directory#createTempOutput}. */
|
||||
// TODO: can we somehow use this as the default resource description or something?
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/** Closes this stream to further operations. */
|
||||
|
|
|
@ -37,8 +37,8 @@ public class OutputStreamIndexOutput extends IndexOutput {
|
|||
* @param bufferSize the buffer size in bytes used to buffer writes internally.
|
||||
* @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
|
||||
*/
|
||||
public OutputStreamIndexOutput(String resourceDescription, OutputStream out, int bufferSize) {
|
||||
super(resourceDescription);
|
||||
public OutputStreamIndexOutput(String resourceDescription, String name, OutputStream out, int bufferSize) {
|
||||
super(resourceDescription, name);
|
||||
this.os = new BufferedOutputStream(new CheckedOutputStream(out, crc), bufferSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@ package org.apache.lucene.store;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -26,12 +26,12 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
|
||||
|
||||
/**
|
||||
* A memory-resident {@link Directory} implementation. Locking
|
||||
* implementation is by default the {@link SingleInstanceLockFactory}.
|
||||
|
@ -111,10 +111,7 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
|
|||
// and do not synchronize or anything stronger. it's great for testing!
|
||||
// NOTE: fileMap.keySet().toArray(new String[0]) is broken in non Sun JDKs,
|
||||
// and the code below is resilient to map changes during the array population.
|
||||
Set<String> fileNames = fileMap.keySet();
|
||||
List<String> names = new ArrayList<>(fileNames.size());
|
||||
for (String name : fileNames) names.add(name);
|
||||
return names.toArray(new String[names.size()]);
|
||||
return fileMap.keySet().toArray(new String[fileMap.size()]);
|
||||
}
|
||||
|
||||
public final boolean fileNameExists(String name) {
|
||||
|
@ -150,9 +147,6 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
|
|||
return Accountables.namedAccountables("file", fileMap);
|
||||
}
|
||||
|
||||
/** Removes an existing file in the directory.
|
||||
* @throws IOException if the file does not exist
|
||||
*/
|
||||
@Override
|
||||
public void deleteFile(String name) throws IOException {
|
||||
ensureOpen();
|
||||
|
@ -165,7 +159,6 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
|
@ -179,6 +172,22 @@ public class RAMDirectory extends BaseDirectory implements Accountable {
|
|||
return new RAMOutputStream(name, file, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
// Make the file first...
|
||||
RAMFile file = newRAMFile();
|
||||
|
||||
// ... then try to find a unique name for it:
|
||||
while (true) {
|
||||
String name = prefix + tempFileRandom.nextInt(Integer.MAX_VALUE) + "." + suffix;
|
||||
if (fileMap.putIfAbsent(name, file) == null) {
|
||||
return new RAMOutputStream(name, file, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new {@link RAMFile} for storing data. This method can be
|
||||
* overridden to return different {@link RAMFile} impls, that e.g. override
|
||||
|
|
|
@ -57,7 +57,7 @@ public class RAMOutputStream extends IndexOutput implements Accountable {
|
|||
|
||||
/** Creates this, with specified name. */
|
||||
public RAMOutputStream(String name, RAMFile f, boolean checksum) {
|
||||
super("RAMOutputStream(name=\"" + name + "\")");
|
||||
super("RAMOutputStream(name=\"" + name + "\")", name);
|
||||
file = f;
|
||||
|
||||
// make sure that we switch to the
|
||||
|
|
|
@ -38,7 +38,7 @@ public final class RateLimitedIndexOutput extends IndexOutput {
|
|||
private long currentMinPauseCheckBytes;
|
||||
|
||||
public RateLimitedIndexOutput(final RateLimiter rateLimiter, final IndexOutput delegate) {
|
||||
super("RateLimitedIndexOutput(" + delegate + ")");
|
||||
super("RateLimitedIndexOutput(" + delegate + ")", delegate.getName());
|
||||
this.delegate = delegate;
|
||||
this.rateLimiter = rateLimiter;
|
||||
this.currentMinPauseCheckBytes = rateLimiter.getMinPauseCheckBytes();
|
||||
|
|
|
@ -45,6 +45,14 @@ public final class TrackingDirectoryWrapper extends FilterDirectory {
|
|||
return output;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context)
|
||||
throws IOException {
|
||||
IndexOutput tempOutput = in.createTempOutput(prefix, suffix, context);
|
||||
createdFileNames.add(tempOutput.getName());
|
||||
return tempOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException {
|
||||
in.copyFrom(from, src, dest, context);
|
||||
|
@ -60,10 +68,12 @@ public final class TrackingDirectoryWrapper extends FilterDirectory {
|
|||
}
|
||||
}
|
||||
|
||||
// maybe clone before returning.... all callers are
|
||||
// cloning anyway....
|
||||
/** NOTE: returns a copy of the created files. */
|
||||
public Set<String> getCreatedFiles() {
|
||||
return createdFileNames;
|
||||
return new HashSet<>(createdFileNames);
|
||||
}
|
||||
|
||||
public void clearCreatedFiles() {
|
||||
createdFileNames.clear();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -190,7 +190,7 @@ public final class IOUtils {
|
|||
* <p>
|
||||
* Note that the files should not be null.
|
||||
*/
|
||||
public static void deleteFilesIgnoringExceptions(Directory dir, String... files) {
|
||||
public static void deleteFilesIgnoringExceptions(Directory dir, Collection<String> files) {
|
||||
for (String name : files) {
|
||||
try {
|
||||
dir.deleteFile(name);
|
||||
|
@ -199,6 +199,42 @@ public final class IOUtils {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void deleteFilesIgnoringExceptions(Directory dir, String... files) {
|
||||
deleteFilesIgnoringExceptions(dir, Arrays.asList(files));
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all given file names. Some of the
|
||||
* file names may be null; they are
|
||||
* ignored. After everything is deleted, the method either
|
||||
* throws the first exception it hit while deleting, or
|
||||
* completes normally if there were no exceptions.
|
||||
*
|
||||
* @param dir Directory to delete files from
|
||||
* @param files file names to delete
|
||||
*/
|
||||
public static void deleteFiles(Directory dir, Collection<String> files) throws IOException {
|
||||
Throwable th = null;
|
||||
for (String name : files) {
|
||||
if (name != null) {
|
||||
try {
|
||||
dir.deleteFile(name);
|
||||
} catch (Throwable t) {
|
||||
addSuppressed(th, t);
|
||||
if (th == null) {
|
||||
th = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reThrow(th);
|
||||
}
|
||||
|
||||
public static void deleteFiles(Directory dir, String... files) throws IOException {
|
||||
deleteFiles(dir, Arrays.asList(files));
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all given files, suppressing all thrown IOExceptions.
|
||||
|
|
|
@ -17,24 +17,20 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutput;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
|
||||
/**
|
||||
* On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
|
||||
* fields:
|
||||
|
@ -43,14 +39,12 @@ import java.util.Locale;
|
|||
* <li>exactly the above count of bytes for the sequence to be sorted.
|
||||
* </ul>
|
||||
*
|
||||
* @see #sort(Path, Path)
|
||||
* @see #sort(String)
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class OfflineSorter {
|
||||
|
||||
private static Path DEFAULT_TEMP_DIR;
|
||||
|
||||
/** Convenience constant for megabytes */
|
||||
public final static long MB = 1024 * 1024;
|
||||
/** Convenience constant for gigabytes */
|
||||
|
@ -72,6 +66,10 @@ public final class OfflineSorter {
|
|||
*/
|
||||
public final static int MAX_TEMPFILES = 128;
|
||||
|
||||
private final Directory dir;
|
||||
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
/**
|
||||
* A bit more descriptive unit for constructors.
|
||||
*
|
||||
|
@ -142,7 +140,7 @@ public final class OfflineSorter {
|
|||
/** number of partition merges */
|
||||
public int mergeRounds;
|
||||
/** number of lines of data read */
|
||||
public int lines;
|
||||
public int lineCount;
|
||||
/** time spent merging sorted partitions (in milliseconds) */
|
||||
public long mergeTime;
|
||||
/** time spent sorting data (in milliseconds) */
|
||||
|
@ -162,17 +160,16 @@ public final class OfflineSorter {
|
|||
return String.format(Locale.ROOT,
|
||||
"time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB",
|
||||
totalTime / 1000.0d, readTime / 1000.0d, sortTime / 1000.0d, mergeTime / 1000.0d,
|
||||
lines, tempMergeFiles, mergeRounds,
|
||||
lineCount, tempMergeFiles, mergeRounds,
|
||||
(double) bufferSize / MB);
|
||||
}
|
||||
}
|
||||
|
||||
private final BufferSize ramBufferSize;
|
||||
private final Path tempDirectory;
|
||||
|
||||
private final Counter bufferBytesUsed = Counter.newCounter();
|
||||
private final BytesRefArray buffer = new BytesRefArray(bufferBytesUsed);
|
||||
private SortInfo sortInfo;
|
||||
SortInfo sortInfo;
|
||||
private int maxTempFiles;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
|
@ -182,27 +179,25 @@ public final class OfflineSorter {
|
|||
/**
|
||||
* Defaults constructor.
|
||||
*
|
||||
* @see #getDefaultTempDir()
|
||||
* @see BufferSize#automatic()
|
||||
*/
|
||||
public OfflineSorter() throws IOException {
|
||||
this(DEFAULT_COMPARATOR, BufferSize.automatic(), getDefaultTempDir(), MAX_TEMPFILES);
|
||||
public OfflineSorter(Directory dir, String tempFileNamePrefix) throws IOException {
|
||||
this(dir, tempFileNamePrefix, DEFAULT_COMPARATOR, BufferSize.automatic(), MAX_TEMPFILES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Defaults constructor with a custom comparator.
|
||||
*
|
||||
* @see #getDefaultTempDir()
|
||||
* @see BufferSize#automatic()
|
||||
*/
|
||||
public OfflineSorter(Comparator<BytesRef> comparator) throws IOException {
|
||||
this(comparator, BufferSize.automatic(), getDefaultTempDir(), MAX_TEMPFILES);
|
||||
public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator) throws IOException {
|
||||
this(dir, tempFileNamePrefix, comparator, BufferSize.automatic(), MAX_TEMPFILES);
|
||||
}
|
||||
|
||||
/**
|
||||
* All-details constructor.
|
||||
*/
|
||||
public OfflineSorter(Comparator<BytesRef> comparator, BufferSize ramBufferSize, Path tempDirectory, int maxTempfiles) {
|
||||
public OfflineSorter(Directory dir, String tempFileNamePrefix, Comparator<BytesRef> comparator, BufferSize ramBufferSize, int maxTempfiles) {
|
||||
if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
|
||||
throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
|
||||
}
|
||||
|
@ -212,160 +207,129 @@ public final class OfflineSorter {
|
|||
}
|
||||
|
||||
this.ramBufferSize = ramBufferSize;
|
||||
this.tempDirectory = tempDirectory;
|
||||
this.maxTempFiles = maxTempfiles;
|
||||
this.comparator = comparator;
|
||||
this.dir = dir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
}
|
||||
|
||||
/** Returns the {@link Directory} we use to create temp files. */
|
||||
public Directory getDirectory() {
|
||||
return dir;
|
||||
}
|
||||
|
||||
/** Returns the temp file name prefix passed to {@link Directory#createTempOutput} to generate temporary files. */
|
||||
public String getTempFileNamePrefix() {
|
||||
return tempFileNamePrefix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort input to output, explicit hint for the buffer size. The amount of allocated
|
||||
* memory may deviate from the hint (may be smaller or larger).
|
||||
* Sort input to a new temp file, returning its name.
|
||||
*/
|
||||
public SortInfo sort(Path input, Path output) throws IOException {
|
||||
public String sort(String inputFileName) throws IOException {
|
||||
|
||||
sortInfo = new SortInfo();
|
||||
sortInfo.totalTime = System.currentTimeMillis();
|
||||
|
||||
// NOTE: don't remove output here: its existence (often created by the caller
|
||||
// up above using Files.createTempFile) prevents another concurrent caller
|
||||
// of this API (from a different thread) from incorrectly re-using this file name
|
||||
List<String> segments = new ArrayList<>();
|
||||
|
||||
ArrayList<Path> merges = new ArrayList<>();
|
||||
boolean success3 = false;
|
||||
try {
|
||||
ByteSequencesReader is = new ByteSequencesReader(input);
|
||||
boolean success = false;
|
||||
try {
|
||||
int lines = 0;
|
||||
while ((lines = readPartition(is)) > 0) {
|
||||
merges.add(sortPartition(lines));
|
||||
sortInfo.tempMergeFiles++;
|
||||
sortInfo.lines += lines;
|
||||
// So we can remove any partially written temp files on exception:
|
||||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
|
||||
|
||||
// Handle intermediate merges.
|
||||
if (merges.size() == maxTempFiles) {
|
||||
Path intermediate = Files.createTempFile(tempDirectory, "sort", "intermediate");
|
||||
boolean success2 = false;
|
||||
try {
|
||||
mergePartitions(merges, intermediate);
|
||||
success2 = true;
|
||||
} finally {
|
||||
if (success2) {
|
||||
IOUtils.deleteFilesIfExist(merges);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(merges);
|
||||
}
|
||||
merges.clear();
|
||||
merges.add(intermediate);
|
||||
}
|
||||
sortInfo.tempMergeFiles++;
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(is);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(is);
|
||||
boolean success = false;
|
||||
try (ByteSequencesReader is = new ByteSequencesReader(dir.openInput(inputFileName, IOContext.READONCE))) {
|
||||
|
||||
int lineCount;
|
||||
while ((lineCount = readPartition(is)) > 0) {
|
||||
segments.add(sortPartition(trackingDir));
|
||||
sortInfo.tempMergeFiles++;
|
||||
sortInfo.lineCount += lineCount;
|
||||
|
||||
// Handle intermediate merges.
|
||||
if (segments.size() == maxTempFiles) {
|
||||
mergePartitions(trackingDir, segments);
|
||||
}
|
||||
}
|
||||
|
||||
// One partition, try to rename or copy if unsuccessful.
|
||||
if (merges.size() == 1) {
|
||||
Files.move(merges.get(0), output, StandardCopyOption.REPLACE_EXISTING);
|
||||
} else {
|
||||
// otherwise merge the partitions with a priority queue.
|
||||
mergePartitions(merges, output);
|
||||
// Merge the partitions to the output file with a priority queue.
|
||||
if (segments.size() > 1) {
|
||||
mergePartitions(trackingDir, segments);
|
||||
}
|
||||
success3 = true;
|
||||
} finally {
|
||||
if (success3) {
|
||||
IOUtils.deleteFilesIfExist(merges);
|
||||
|
||||
String result;
|
||||
if (segments.isEmpty()) {
|
||||
try (IndexOutput out = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
|
||||
result = out.getName();
|
||||
}
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(merges);
|
||||
IOUtils.deleteFilesIgnoringExceptions(output);
|
||||
result = segments.get(0);
|
||||
}
|
||||
|
||||
// We should be explicitly removing all intermediate files ourselves unless there is an exception:
|
||||
assert trackingDir.getCreatedFiles().size() == 1 && trackingDir.getCreatedFiles().contains(result);
|
||||
|
||||
sortInfo.totalTime = (System.currentTimeMillis() - sortInfo.totalTime);
|
||||
success = true;
|
||||
|
||||
return result;
|
||||
|
||||
} finally {
|
||||
if (success == false) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(trackingDir, trackingDir.getCreatedFiles());
|
||||
}
|
||||
}
|
||||
|
||||
sortInfo.totalTime = (System.currentTimeMillis() - sortInfo.totalTime);
|
||||
return sortInfo;
|
||||
}
|
||||
|
||||
/** Used by test framework */
|
||||
static void setDefaultTempDir(Path tempDir) {
|
||||
DEFAULT_TEMP_DIR = tempDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the default temporary directory. By default, java.io.tmpdir. If not accessible
|
||||
* or not available, an IOException is thrown
|
||||
*/
|
||||
public synchronized static Path getDefaultTempDir() throws IOException {
|
||||
if (DEFAULT_TEMP_DIR == null) {
|
||||
// Lazy init
|
||||
String tempDirPath = System.getProperty("java.io.tmpdir");
|
||||
if (tempDirPath == null) {
|
||||
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
|
||||
}
|
||||
Path tempDirectory = Paths.get(tempDirPath);
|
||||
if (Files.isWritable(tempDirectory) == false) {
|
||||
throw new IOException("Java's temporary folder not present or writeable?: "
|
||||
+ tempDirectory.toAbsolutePath());
|
||||
}
|
||||
DEFAULT_TEMP_DIR = tempDirectory;
|
||||
}
|
||||
|
||||
return DEFAULT_TEMP_DIR;
|
||||
}
|
||||
|
||||
/** Sort a single partition in-memory. */
|
||||
protected Path sortPartition(int len) throws IOException {
|
||||
protected String sortPartition(TrackingDirectoryWrapper trackingDir) throws IOException {
|
||||
BytesRefArray data = this.buffer;
|
||||
Path tempFile = Files.createTempFile(tempDirectory, "sort", "partition");
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
sortInfo.sortTime += (System.currentTimeMillis() - start);
|
||||
|
||||
final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
|
||||
BytesRef spare;
|
||||
try {
|
||||
try (IndexOutput tempFile = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
|
||||
ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
|
||||
BytesRef spare;
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
BytesRefIterator iter = buffer.iterator(comparator);
|
||||
while((spare = iter.next()) != null) {
|
||||
sortInfo.sortTime += (System.currentTimeMillis() - start);
|
||||
|
||||
while ((spare = iter.next()) != null) {
|
||||
assert spare.length <= Short.MAX_VALUE;
|
||||
out.write(spare);
|
||||
}
|
||||
|
||||
out.close();
|
||||
|
||||
// Clean up the buffer for the next partition.
|
||||
data.clear();
|
||||
return tempFile;
|
||||
} finally {
|
||||
IOUtils.close(out);
|
||||
|
||||
return tempFile.getName();
|
||||
}
|
||||
}
|
||||
|
||||
/** Merge a list of sorted temporary files (partitions) into an output file */
|
||||
void mergePartitions(List<Path> merges, Path outputFile) throws IOException {
|
||||
/** Merge a list of sorted temporary files (partitions) into an output file. Note that this closes the
|
||||
* incoming {@link IndexOutput}. */
|
||||
void mergePartitions(Directory trackingDir, List<String> segments) throws IOException {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
ByteSequencesWriter out = new ByteSequencesWriter(outputFile);
|
||||
|
||||
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
|
||||
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(segments.size()) {
|
||||
@Override
|
||||
protected boolean lessThan(FileAndTop a, FileAndTop b) {
|
||||
return comparator.compare(a.current.get(), b.current.get()) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
ByteSequencesReader [] streams = new ByteSequencesReader [merges.size()];
|
||||
try {
|
||||
ByteSequencesReader[] streams = new ByteSequencesReader[segments.size()];
|
||||
|
||||
String newSegmentName = null;
|
||||
|
||||
try (IndexOutput out = trackingDir.createTempOutput(tempFileNamePrefix, "sort", IOContext.DEFAULT)) {
|
||||
newSegmentName = out.getName();
|
||||
ByteSequencesWriter writer = new ByteSequencesWriter(out);
|
||||
|
||||
// Open streams and read the top for each file
|
||||
for (int i = 0; i < merges.size(); i++) {
|
||||
streams[i] = new ByteSequencesReader(merges.get(i));
|
||||
byte line[] = streams[i].read();
|
||||
if (line != null) {
|
||||
queue.insertWithOverflow(new FileAndTop(i, line));
|
||||
}
|
||||
for (int i = 0; i < segments.size(); i++) {
|
||||
streams[i] = new ByteSequencesReader(dir.openInput(segments.get(i), IOContext.READONCE));
|
||||
byte[] line = streams[i].read();
|
||||
assert line != null;
|
||||
queue.insertWithOverflow(new FileAndTop(i, line));
|
||||
}
|
||||
|
||||
// Unix utility sort() uses ordered array of files to pick the next line from, updating
|
||||
|
@ -374,7 +338,7 @@ public final class OfflineSorter {
|
|||
// so it shouldn't make much of a difference (didn't check).
|
||||
FileAndTop top;
|
||||
while ((top = queue.top()) != null) {
|
||||
out.write(top.current.bytes(), 0, top.current.length());
|
||||
writer.write(top.current.bytes(), 0, top.current.length());
|
||||
if (!streams[top.fd].read(top.current)) {
|
||||
queue.pop();
|
||||
} else {
|
||||
|
@ -385,14 +349,15 @@ public final class OfflineSorter {
|
|||
sortInfo.mergeTime += System.currentTimeMillis() - start;
|
||||
sortInfo.mergeRounds++;
|
||||
} finally {
|
||||
// The logic below is: if an exception occurs in closing out, it has a priority over exceptions
|
||||
// happening in closing streams.
|
||||
try {
|
||||
IOUtils.close(streams);
|
||||
} finally {
|
||||
IOUtils.close(out);
|
||||
}
|
||||
IOUtils.close(streams);
|
||||
}
|
||||
|
||||
IOUtils.deleteFiles(trackingDir, segments);
|
||||
|
||||
segments.clear();
|
||||
segments.add(newSegmentName);
|
||||
|
||||
sortInfo.tempMergeFiles++;
|
||||
}
|
||||
|
||||
/** Read in a single partition of data */
|
||||
|
@ -428,18 +393,11 @@ public final class OfflineSorter {
|
|||
* Complementary to {@link ByteSequencesReader}.
|
||||
*/
|
||||
public static class ByteSequencesWriter implements Closeable {
|
||||
private final DataOutput os;
|
||||
|
||||
/** Constructs a ByteSequencesWriter to the provided Path */
|
||||
public ByteSequencesWriter(Path path) throws IOException {
|
||||
this(new DataOutputStream(
|
||||
new BufferedOutputStream(
|
||||
Files.newOutputStream(path))));
|
||||
}
|
||||
private final IndexOutput out;
|
||||
|
||||
/** Constructs a ByteSequencesWriter to the provided DataOutput */
|
||||
public ByteSequencesWriter(DataOutput os) {
|
||||
this.os = os;
|
||||
public ByteSequencesWriter(IndexOutput out) {
|
||||
this.out = out;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -455,7 +413,7 @@ public final class OfflineSorter {
|
|||
* Writes a byte array.
|
||||
* @see #write(byte[], int, int)
|
||||
*/
|
||||
public void write(byte [] bytes) throws IOException {
|
||||
public void write(byte[] bytes) throws IOException {
|
||||
write(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
|
@ -465,25 +423,23 @@ public final class OfflineSorter {
|
|||
* The length is written as a <code>short</code>, followed
|
||||
* by the bytes.
|
||||
*/
|
||||
public void write(byte [] bytes, int off, int len) throws IOException {
|
||||
public void write(byte[] bytes, int off, int len) throws IOException {
|
||||
assert bytes != null;
|
||||
assert off >= 0 && off + len <= bytes.length;
|
||||
assert len >= 0;
|
||||
if (len > Short.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("len must be <= " + Short.MAX_VALUE + "; got " + len);
|
||||
}
|
||||
os.writeShort(len);
|
||||
os.write(bytes, off, len);
|
||||
out.writeShort((short) len);
|
||||
out.writeBytes(bytes, off, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the provided {@link DataOutput} if it is {@link Closeable}.
|
||||
* Closes the provided {@link IndexOutput}.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (os instanceof Closeable) {
|
||||
((Closeable) os).close();
|
||||
}
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,18 +448,11 @@ public final class OfflineSorter {
|
|||
* Complementary to {@link ByteSequencesWriter}.
|
||||
*/
|
||||
public static class ByteSequencesReader implements Closeable {
|
||||
private final DataInput is;
|
||||
private final IndexInput in;
|
||||
|
||||
/** Constructs a ByteSequencesReader from the provided Path */
|
||||
public ByteSequencesReader(Path path) throws IOException {
|
||||
this(new DataInputStream(
|
||||
new BufferedInputStream(
|
||||
Files.newInputStream(path))));
|
||||
}
|
||||
|
||||
/** Constructs a ByteSequencesReader from the provided DataInput */
|
||||
public ByteSequencesReader(DataInput is) {
|
||||
this.is = is;
|
||||
/** Constructs a ByteSequencesReader from the provided IndexInput */
|
||||
public ByteSequencesReader(IndexInput in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -517,14 +466,14 @@ public final class OfflineSorter {
|
|||
public boolean read(BytesRefBuilder ref) throws IOException {
|
||||
short length;
|
||||
try {
|
||||
length = is.readShort();
|
||||
length = in.readShort();
|
||||
} catch (EOFException e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ref.grow(length);
|
||||
ref.setLength(length);
|
||||
is.readFully(ref.bytes(), 0, length);
|
||||
in.readBytes(ref.bytes(), 0, length);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -540,25 +489,23 @@ public final class OfflineSorter {
|
|||
public byte[] read() throws IOException {
|
||||
short length;
|
||||
try {
|
||||
length = is.readShort();
|
||||
length = in.readShort();
|
||||
} catch (EOFException e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert length >= 0 : "Sanity: sequence length < 0: " + length;
|
||||
byte [] result = new byte [length];
|
||||
is.readFully(result);
|
||||
byte[] result = new byte[length];
|
||||
in.readBytes(result, 0, length);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the provided {@link DataInput} if it is {@link Closeable}.
|
||||
* Closes the provided {@link IndexInput}.
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (is instanceof Closeable) {
|
||||
((Closeable) is).close();
|
||||
}
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -283,7 +283,7 @@ public class TestCodecUtil extends LuceneTestCase {
|
|||
final IndexOutput output = new RAMOutputStream(file, false);
|
||||
AtomicLong fakeChecksum = new AtomicLong();
|
||||
// wrap the index input where we control the checksum for mocking
|
||||
IndexOutput fakeOutput = new IndexOutput("fake") {
|
||||
IndexOutput fakeOutput = new IndexOutput("fake", "fake") {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
output.close();
|
||||
|
|
|
@ -17,20 +17,22 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.OfflineSorter.BufferSize;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.OfflineSorter.SortInfo;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
/**
|
||||
* Tests for on-disk merge sorting.
|
||||
|
@ -52,35 +54,53 @@ public class TestOfflineSorter extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
private static Directory newDirectoryNoVirusScanner() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
checkSort(new OfflineSorter(), new byte [][] {});
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
checkSort(dir, new OfflineSorter(dir, "foo"), new byte [][] {});
|
||||
}
|
||||
}
|
||||
|
||||
public void testSingleLine() throws Exception {
|
||||
checkSort(new OfflineSorter(), new byte [][] {
|
||||
"Single line only.".getBytes(StandardCharsets.UTF_8)
|
||||
});
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
checkSort(dir, new OfflineSorter(dir, "foo"), new byte [][] {
|
||||
"Single line only.".getBytes(StandardCharsets.UTF_8)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public void testIntermediateMerges() throws Exception {
|
||||
// Sort 20 mb worth of data with 1mb buffer, binary merging.
|
||||
SortInfo info = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.getDefaultTempDir(), 2),
|
||||
generateRandom((int)OfflineSorter.MB * 20));
|
||||
assertTrue(info.mergeRounds > 10);
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
SortInfo info = checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), 2),
|
||||
generateRandom((int)OfflineSorter.MB * 20));
|
||||
assertTrue(info.mergeRounds > 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSmallRandom() throws Exception {
|
||||
// Sort 20 mb worth of data with 1mb buffer.
|
||||
SortInfo sortInfo = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.getDefaultTempDir(), OfflineSorter.MAX_TEMPFILES),
|
||||
generateRandom((int)OfflineSorter.MB * 20));
|
||||
assertEquals(1, sortInfo.mergeRounds);
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
SortInfo sortInfo = checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.MAX_TEMPFILES),
|
||||
generateRandom((int)OfflineSorter.MB * 20));
|
||||
assertEquals(1, sortInfo.mergeRounds);
|
||||
}
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testLargerRandom() throws Exception {
|
||||
// Sort 100MB worth of data with 15mb buffer.
|
||||
checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(16), OfflineSorter.getDefaultTempDir(), OfflineSorter.MAX_TEMPFILES),
|
||||
generateRandom((int)OfflineSorter.MB * 100));
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
checkSort(dir, new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(16), OfflineSorter.MAX_TEMPFILES),
|
||||
generateRandom((int)OfflineSorter.MB * 100));
|
||||
}
|
||||
}
|
||||
|
||||
private byte[][] generateRandom(int howMuchDataInBytes) {
|
||||
|
@ -101,8 +121,9 @@ public class TestOfflineSorter extends LuceneTestCase {
|
|||
final int max = Math.min(left.length, right.length);
|
||||
for (int i = 0, j = 0; i < max; i++, j++) {
|
||||
int diff = (left[i] & 0xff) - (right[j] & 0xff);
|
||||
if (diff != 0)
|
||||
if (diff != 0) {
|
||||
return diff;
|
||||
}
|
||||
}
|
||||
return left.length - right.length;
|
||||
}
|
||||
|
@ -111,54 +132,56 @@ public class TestOfflineSorter extends LuceneTestCase {
|
|||
/**
|
||||
* Check sorting data on an instance of {@link OfflineSorter}.
|
||||
*/
|
||||
private SortInfo checkSort(OfflineSorter sort, byte[][] data) throws IOException {
|
||||
Path unsorted = writeAll("unsorted", data);
|
||||
private SortInfo checkSort(Directory dir, OfflineSorter sorter, byte[][] data) throws IOException {
|
||||
|
||||
IndexOutput unsorted = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
|
||||
writeAll(unsorted, data);
|
||||
|
||||
IndexOutput golden = dir.createTempOutput("golden", "tmp", IOContext.DEFAULT);
|
||||
Arrays.sort(data, unsignedByteOrderComparator);
|
||||
Path golden = writeAll("golden", data);
|
||||
writeAll(golden, data);
|
||||
|
||||
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
|
||||
SortInfo sortInfo;
|
||||
try {
|
||||
sortInfo = sort.sort(unsorted, sorted);
|
||||
//System.out.println("Input size [MB]: " + unsorted.length() / (1024 * 1024));
|
||||
//System.out.println(sortInfo);
|
||||
assertFilesIdentical(golden, sorted);
|
||||
} finally {
|
||||
IOUtils.rm(unsorted, golden, sorted);
|
||||
}
|
||||
String sorted = sorter.sort(unsorted.getName());
|
||||
//System.out.println("Input size [MB]: " + unsorted.length() / (1024 * 1024));
|
||||
//System.out.println(sortInfo);
|
||||
assertFilesIdentical(dir, golden.getName(), sorted);
|
||||
|
||||
return sortInfo;
|
||||
return sorter.sortInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure two files are byte-byte identical.
|
||||
*/
|
||||
private void assertFilesIdentical(Path golden, Path sorted) throws IOException {
|
||||
assertEquals(Files.size(golden), Files.size(sorted));
|
||||
private void assertFilesIdentical(Directory dir, String golden, String sorted) throws IOException {
|
||||
long numBytes = dir.fileLength(golden);
|
||||
assertEquals(numBytes, dir.fileLength(sorted));
|
||||
|
||||
byte [] buf1 = new byte [64 * 1024];
|
||||
byte [] buf2 = new byte [64 * 1024];
|
||||
int len;
|
||||
DataInputStream is1 = new DataInputStream(Files.newInputStream(golden));
|
||||
DataInputStream is2 = new DataInputStream(Files.newInputStream(sorted));
|
||||
while ((len = is1.read(buf1)) > 0) {
|
||||
is2.readFully(buf2, 0, len);
|
||||
for (int i = 0; i < len; i++) {
|
||||
assertEquals(buf1[i], buf2[i]);
|
||||
byte[] buf1 = new byte[64 * 1024];
|
||||
byte[] buf2 = new byte[64 * 1024];
|
||||
try (
|
||||
IndexInput in1 = dir.openInput(golden, IOContext.READONCE);
|
||||
IndexInput in2 = dir.openInput(sorted, IOContext.READONCE)
|
||||
) {
|
||||
long left = numBytes;
|
||||
while (left > 0) {
|
||||
int chunk = (int) Math.min(buf1.length, left);
|
||||
left -= chunk;
|
||||
in1.readBytes(buf1, 0, chunk);
|
||||
in2.readBytes(buf2, 0, chunk);
|
||||
for (int i = 0; i < chunk; i++) {
|
||||
assertEquals(buf1[i], buf2[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
IOUtils.close(is1, is2);
|
||||
}
|
||||
|
||||
private Path writeAll(String name, byte[][] data) throws IOException {
|
||||
Path file = Files.createTempFile(tempDir, name, "");
|
||||
ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
|
||||
for (byte [] datum : data) {
|
||||
w.write(datum);
|
||||
/** NOTE: closes the provided {@link IndexOutput} */
|
||||
private void writeAll(IndexOutput out, byte[][] data) throws IOException {
|
||||
try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
|
||||
for (byte [] datum : data) {
|
||||
w.write(datum);
|
||||
}
|
||||
}
|
||||
w.close();
|
||||
return file;
|
||||
}
|
||||
|
||||
public void testRamBuffer() {
|
||||
|
@ -192,25 +215,27 @@ public class TestOfflineSorter extends LuceneTestCase {
|
|||
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 10)];
|
||||
final AtomicBoolean failed = new AtomicBoolean();
|
||||
final int iters = atLeast(1000);
|
||||
for(int i=0;i<threads.length;i++) {
|
||||
threads[i] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for(int iter=0;iter<iters && failed.get() == false;iter++) {
|
||||
checkSort(new OfflineSorter(), generateRandom(1024));
|
||||
try (Directory dir = newDirectoryNoVirusScanner()) {
|
||||
for(int i=0;i<threads.length;i++) {
|
||||
final int threadID = i;
|
||||
threads[i] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for(int iter=0;iter<iters && failed.get() == false;iter++) {
|
||||
checkSort(dir, new OfflineSorter(dir, "foo_" + threadID + "_" + iter), generateRandom(1024));
|
||||
}
|
||||
} catch (Throwable th) {
|
||||
failed.set(true);
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
} catch (Throwable th) {
|
||||
failed.set(true);
|
||||
throw new RuntimeException(th);
|
||||
}
|
||||
}
|
||||
};
|
||||
threads[i].start();
|
||||
}
|
||||
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
};
|
||||
threads[i].start();
|
||||
}
|
||||
for(Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
assertFalse(failed.get());
|
||||
|
|
|
@ -143,7 +143,7 @@ public class SlowRAMDirectory extends RAMDirectory {
|
|||
private final Random rand;
|
||||
|
||||
public SlowIndexOutput(IndexOutput io) {
|
||||
super("SlowIndexOutput(" + io + ")");
|
||||
super("SlowIndexOutput(" + io + ")", io.getName());
|
||||
this.io = io;
|
||||
this.rand = forkRandom();
|
||||
}
|
||||
|
|
|
@ -18,10 +18,10 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileDescriptor;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Path;
|
||||
|
@ -149,7 +149,7 @@ public class NativeUnixDirectory extends FSDirectory {
|
|||
return delegate.createOutput(name, context);
|
||||
} else {
|
||||
ensureCanWrite(name);
|
||||
return new NativeUnixIndexOutput(getDirectory().resolve(name), mergeBufferSize);
|
||||
return new NativeUnixIndexOutput(getDirectory().resolve(name), name, mergeBufferSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,8 +167,8 @@ public class NativeUnixDirectory extends FSDirectory {
|
|||
private long fileLength;
|
||||
private boolean isOpen;
|
||||
|
||||
public NativeUnixIndexOutput(Path path, int bufferSize) throws IOException {
|
||||
super("NativeUnixIndexOutput(path=\"" + path.toString() + "\")");
|
||||
public NativeUnixIndexOutput(Path path, String name, int bufferSize) throws IOException {
|
||||
super("NativeUnixIndexOutput(path=\"" + path.toString() + "\")", name);
|
||||
//this.path = path;
|
||||
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
|
||||
fos = new FileOutputStream(fd);
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -39,9 +40,13 @@ class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
|||
final IndexOutput out;
|
||||
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
|
||||
final SegmentWriteState state;
|
||||
final Directory tempDir;
|
||||
final String tempFileNamePrefix;
|
||||
|
||||
public BKDTreeDocValuesConsumer(DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
public BKDTreeDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
BKDTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.delegate = delegate;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
|
@ -91,7 +96,7 @@ class BKDTreeDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
|||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
delegate.addSortedNumericField(field, docToValueCount, values);
|
||||
BKDTreeWriter writer = new BKDTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
BKDTreeWriter writer = new BKDTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> valueIt = values.iterator();
|
||||
Iterator<Number> valueCountIt = docToValueCount.iterator();
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
|
|
|
@ -99,7 +99,7 @@ public class BKDTreeDocValuesFormat extends DocValuesFormat {
|
|||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
|
||||
return new BKDTreeDocValuesConsumer(delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
return new BKDTreeDocValuesConsumer(state.directory, state.segmentInfo.name, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,14 +18,13 @@ package org.apache.lucene.bkdtree;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -33,8 +32,8 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
|
@ -88,22 +87,27 @@ class BKDTreeWriter {
|
|||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter writer;
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapLatLonWriter heapWriter;
|
||||
|
||||
private Path tempInput;
|
||||
private IndexOutput tempInput;
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
private long pointCount;
|
||||
|
||||
public BKDTreeWriter() throws IOException {
|
||||
this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public BKDTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
public BKDTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
|
||||
|
@ -143,8 +147,8 @@ class BKDTreeWriter {
|
|||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
|
||||
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(heapWriter.latEncs[i]);
|
||||
|
@ -152,7 +156,7 @@ class BKDTreeWriter {
|
|||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
|
@ -165,7 +169,7 @@ class BKDTreeWriter {
|
|||
assert lonEnc < Integer.MAX_VALUE;
|
||||
|
||||
if (pointCount >= maxPointsSortInHeap) {
|
||||
if (writer == null) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
|
@ -173,7 +177,7 @@ class BKDTreeWriter {
|
|||
scratchBytesOutput.writeInt(lonEnc);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(pointCount);
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(latEnc, lonEnc, pointCount, docID);
|
||||
|
@ -184,7 +188,7 @@ class BKDTreeWriter {
|
|||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private LatLonWriter convertToFixedWidth(Path in) throws IOException {
|
||||
private LatLonWriter convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
|
@ -194,7 +198,7 @@ class BKDTreeWriter {
|
|||
LatLonWriter sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(in);
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(pointCount);
|
||||
for (long i=0;i<pointCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
|
@ -230,10 +234,10 @@ class BKDTreeWriter {
|
|||
|
||||
private LatLonWriter sort(boolean lon) throws IOException {
|
||||
if (heapWriter != null) {
|
||||
// All buffered points are still in heap
|
||||
|
||||
assert pointCount < Integer.MAX_VALUE;
|
||||
|
||||
// All buffered points are still in heap
|
||||
new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
|
@ -329,20 +333,19 @@ class BKDTreeWriter {
|
|||
}
|
||||
};
|
||||
|
||||
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
|
||||
boolean success = false;
|
||||
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
OfflineSorter latSorter = new OfflineSorter(cmp);
|
||||
latSorter.sort(tempInput, sorted);
|
||||
LatLonWriter writer = convertToFixedWidth(sorted);
|
||||
LatLonWriter writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.rm(sorted);
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorted);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -352,8 +355,9 @@ class BKDTreeWriter {
|
|||
public long finish(IndexOutput out) throws IOException {
|
||||
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
|
||||
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
if (offlineWriter != null) {
|
||||
// This also closes the temp file output:
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
LongBitSet bitSet = new LongBitSet(pointCount);
|
||||
|
@ -410,7 +414,9 @@ class BKDTreeWriter {
|
|||
if (success) {
|
||||
latSortedWriter.destroy();
|
||||
lonSortedWriter.destroy();
|
||||
IOUtils.rm(tempInput);
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
latSortedWriter.destroy();
|
||||
|
@ -422,7 +428,9 @@ class BKDTreeWriter {
|
|||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput);
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -813,7 +821,7 @@ class BKDTreeWriter {
|
|||
if (count < maxPointsSortInHeap) {
|
||||
return new HeapLatLonWriter((int) count);
|
||||
} else {
|
||||
return new OfflineLatLonWriter(count);
|
||||
return new OfflineLatLonWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,34 +17,23 @@ package org.apache.lucene.bkdtree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineLatLonReader implements LatLonReader {
|
||||
final InputStreamDataInput in;
|
||||
final IndexInput in;
|
||||
long countLeft;
|
||||
private int latEnc;
|
||||
private int lonEnc;
|
||||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineLatLonReader(Path tempFile, long start, long count) throws IOException {
|
||||
InputStream fis = Files.newInputStream(tempFile);
|
||||
long seekFP = start * BKDTreeWriter.BYTES_PER_DOC;
|
||||
long skipped = 0;
|
||||
while (skipped < seekFP) {
|
||||
long inc = fis.skip(seekFP - skipped);
|
||||
skipped += inc;
|
||||
if (inc == 0) {
|
||||
throw new RuntimeException("skip returned 0");
|
||||
}
|
||||
}
|
||||
in = new InputStreamDataInput(new BufferedInputStream(fis));
|
||||
OfflineLatLonReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * BKDTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,29 +17,26 @@ package org.apache.lucene.bkdtree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineLatLonWriter implements LatLonWriter {
|
||||
|
||||
final Path tempFile;
|
||||
final Directory tempDir;
|
||||
final byte[] scratchBytes = new byte[BKDTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final OutputStreamDataOutput out;
|
||||
final IndexOutput out;
|
||||
final long count;
|
||||
private long countWritten;
|
||||
private boolean closed;
|
||||
|
||||
public OfflineLatLonWriter(long count) throws IOException {
|
||||
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
|
||||
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
|
||||
public OfflineLatLonWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
|
@ -55,7 +52,7 @@ final class OfflineLatLonWriter implements LatLonWriter {
|
|||
@Override
|
||||
public LatLonReader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineLatLonReader(tempFile, start, count-start);
|
||||
return new OfflineLatLonReader(tempDir, out.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -69,12 +66,12 @@ final class OfflineLatLonWriter implements LatLonWriter {
|
|||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
IOUtils.rm(tempFile);
|
||||
tempDir.deleteFile(out.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineLatLonWriter(count=" + count + " tempFile=" + tempFile + ")";
|
||||
return "OfflineLatLonWriter(count=" + count + " tempFileName=" + out.getName() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,33 +17,22 @@ package org.apache.lucene.rangetree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineSliceReader implements SliceReader {
|
||||
final InputStreamDataInput in;
|
||||
long countLeft;
|
||||
final IndexInput in;
|
||||
private long countLeft;
|
||||
private long value;
|
||||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineSliceReader(Path tempFile, long start, long count) throws IOException {
|
||||
InputStream fis = Files.newInputStream(tempFile);
|
||||
long seekFP = start * RangeTreeWriter.BYTES_PER_DOC;
|
||||
long skipped = 0;
|
||||
while (skipped < seekFP) {
|
||||
long inc = fis.skip(seekFP - skipped);
|
||||
skipped += inc;
|
||||
if (inc == 0) {
|
||||
throw new RuntimeException("skip returned 0");
|
||||
}
|
||||
}
|
||||
in = new InputStreamDataInput(new BufferedInputStream(fis));
|
||||
OfflineSliceReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * RangeTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,50 +17,47 @@ package org.apache.lucene.rangetree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineSliceWriter implements SliceWriter {
|
||||
|
||||
final Path tempFile;
|
||||
final Directory tempDir;
|
||||
final byte[] scratchBytes = new byte[RangeTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final OutputStreamDataOutput out;
|
||||
final IndexOutput tempFile;
|
||||
final long count;
|
||||
private boolean closed;
|
||||
private long countWritten;
|
||||
|
||||
public OfflineSliceWriter(long count) throws IOException {
|
||||
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
|
||||
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
|
||||
public OfflineSliceWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
tempFile = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(long value, long ord, int docID) throws IOException {
|
||||
out.writeLong(value);
|
||||
out.writeLong(ord);
|
||||
out.writeInt(docID);
|
||||
tempFile.writeLong(value);
|
||||
tempFile.writeLong(ord);
|
||||
tempFile.writeInt(docID);
|
||||
countWritten++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SliceReader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineSliceReader(tempFile, start, count-start);
|
||||
return new OfflineSliceReader(tempDir, tempFile.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
closed = true;
|
||||
out.close();
|
||||
tempFile.close();
|
||||
if (count != countWritten) {
|
||||
throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
|
||||
}
|
||||
|
@ -68,12 +65,12 @@ final class OfflineSliceWriter implements SliceWriter {
|
|||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
IOUtils.rm(tempFile);
|
||||
tempDir.deleteFile(tempFile.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineSliceWriter(count=" + count + " tempFile=" + tempFile + ")";
|
||||
return "OfflineSliceWriter(count=" + count + " tempFileName=" + tempFile.getName() + ")";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable
|
|||
@Override
|
||||
public void addSortedNumericField(FieldInfo field, Iterable<Number> docToValueCount, Iterable<Number> values) throws IOException {
|
||||
delegate.addSortedNumericField(field, docToValueCount, values);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> valueIt = values.iterator();
|
||||
Iterator<Number> valueCountIt = docToValueCount.iterator();
|
||||
//System.out.println("\nSNF: field=" + field.name);
|
||||
|
@ -127,7 +127,7 @@ class RangeTreeDocValuesConsumer extends DocValuesConsumer implements Closeable
|
|||
@Override
|
||||
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
|
||||
delegate.addSortedSetField(field, values, docToOrdCount, ords);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
RangeTreeWriter writer = new RangeTreeWriter(state.directory, state.segmentInfo.name, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
|
||||
Iterator<Number> ordsIt = ords.iterator();
|
||||
//System.out.println("\nSSF: field=" + field.name);
|
||||
|
|
|
@ -18,22 +18,21 @@ package org.apache.lucene.rangetree;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
|
@ -77,10 +76,13 @@ class RangeTreeWriter {
|
|||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter writer;
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapSliceWriter heapWriter;
|
||||
|
||||
private Path tempInput;
|
||||
private IndexOutput tempInput;
|
||||
private final int maxValuesInLeafNode;
|
||||
private final int maxValuesSortInHeap;
|
||||
|
||||
|
@ -88,13 +90,15 @@ class RangeTreeWriter {
|
|||
private long globalMinValue = Long.MAX_VALUE;
|
||||
private long globalMaxValue = Long.MIN_VALUE;
|
||||
|
||||
public RangeTreeWriter() throws IOException {
|
||||
this(DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
|
||||
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_VALUES_IN_LEAF_NODE, DEFAULT_MAX_VALUES_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxValuesSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public RangeTreeWriter(int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
|
||||
public RangeTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxValuesInLeafNode, int maxValuesSortInHeap) throws IOException {
|
||||
verifyParams(maxValuesInLeafNode, maxValuesSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxValuesInLeafNode = maxValuesInLeafNode;
|
||||
this.maxValuesSortInHeap = maxValuesSortInHeap;
|
||||
|
||||
|
@ -121,15 +125,15 @@ class RangeTreeWriter {
|
|||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
|
||||
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "rangetree", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<valueCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeLong(heapWriter.values[i]);
|
||||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
|
@ -137,14 +141,14 @@ class RangeTreeWriter {
|
|||
|
||||
void add(long value, int docID) throws IOException {
|
||||
if (valueCount >= maxValuesSortInHeap) {
|
||||
if (writer == null) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeLong(value);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(valueCount);
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(value, valueCount, docID);
|
||||
|
@ -157,7 +161,7 @@ class RangeTreeWriter {
|
|||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private SliceWriter convertToFixedWidth(Path in) throws IOException {
|
||||
private SliceWriter convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
|
@ -167,7 +171,7 @@ class RangeTreeWriter {
|
|||
SliceWriter sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(in);
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(valueCount);
|
||||
for (long i=0;i<valueCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
|
@ -280,19 +284,18 @@ class RangeTreeWriter {
|
|||
}
|
||||
};
|
||||
|
||||
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
OfflineSorter sorter = new OfflineSorter(cmp);
|
||||
sorter.sort(tempInput, sorted);
|
||||
SliceWriter writer = convertToFixedWidth(sorted);
|
||||
SliceWriter writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.rm(sorted);
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorted);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -301,8 +304,8 @@ class RangeTreeWriter {
|
|||
/** Writes the 1d BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
|
||||
public long finish(IndexOutput out) throws IOException {
|
||||
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
if (offlineWriter != null) {
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
if (valueCount == 0) {
|
||||
|
@ -357,14 +360,18 @@ class RangeTreeWriter {
|
|||
} finally {
|
||||
if (success) {
|
||||
sortedWriter.destroy();
|
||||
IOUtils.rm(tempInput);
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
sortedWriter.destroy();
|
||||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput);
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -567,7 +574,7 @@ class RangeTreeWriter {
|
|||
if (count < maxValuesSortInHeap) {
|
||||
return new HeapSliceWriter((int) count);
|
||||
} else {
|
||||
return new OfflineSliceWriter(count);
|
||||
return new OfflineSliceWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,15 @@ package org.apache.lucene.bkdtree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
|
||||
|
@ -37,24 +46,16 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.Nightly;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class TestBKDTree extends LuceneTestCase {
|
||||
|
||||
private static boolean smallBBox;
|
||||
|
@ -163,7 +164,7 @@ public class TestBKDTree extends LuceneTestCase {
|
|||
// Every doc has 2 points:
|
||||
double[] lats = new double[2*numPoints];
|
||||
double[] lons = new double[2*numPoints];
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
// We rely on docID order:
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
|
@ -378,7 +379,7 @@ public class TestBKDTree extends LuceneTestCase {
|
|||
if (lats.length > 100000) {
|
||||
dir = newFSDirectory(createTempDir("TestBKDTree"));
|
||||
} else {
|
||||
dir = newDirectory();
|
||||
dir = getDirectory();
|
||||
}
|
||||
Set<Integer> deleted = new HashSet<>();
|
||||
// RandomIndexWriter is too slow here:
|
||||
|
@ -608,7 +609,7 @@ public class TestBKDTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testAccountableHasDelegate() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(getDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -632,4 +633,12 @@ public class TestBKDTree extends LuceneTestCase {
|
|||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
return new BKDTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
private static Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,14 @@ package org.apache.lucene.rangetree;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
|
||||
|
@ -39,6 +47,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -49,14 +58,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class TestRangeTree extends LuceneTestCase {
|
||||
|
||||
// Controls what range of values we randomly generate, so we sometimes test narrow ranges:
|
||||
|
@ -116,7 +117,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
int numValues = atLeast(10000);
|
||||
// Every doc has 2 values:
|
||||
long[] values = new long[2*numValues];
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
// We rely on docID order:
|
||||
|
@ -201,7 +202,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
int numValues = atLeast(10000);
|
||||
// Every doc has 2 values:
|
||||
long[] values = new long[2*numValues];
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
||||
// We rely on docID order:
|
||||
|
@ -370,7 +371,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
if (values.length > 100000) {
|
||||
dir = newFSDirectory(createTempDir("TestRangeTree"));
|
||||
} else {
|
||||
dir = newDirectory();
|
||||
dir = getDirectory();
|
||||
}
|
||||
Set<Integer> deleted = new HashSet<>();
|
||||
// RandomIndexWriter is too slow here:
|
||||
|
@ -534,7 +535,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testAccountableHasDelegate() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -554,7 +555,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testMinMaxLong() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -580,7 +581,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBasicSortedSet() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -613,7 +614,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testLongMinMaxNumeric() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -641,7 +642,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testLongMinMaxSortedSet() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -669,7 +670,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSortedSetNoOrdsMatch() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -693,7 +694,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNumericNoValuesMatch() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -715,7 +716,7 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNoDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
|
||||
iwc.setCodec(codec);
|
||||
|
@ -766,4 +767,12 @@ public class TestRangeTree extends LuceneTestCase {
|
|||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
return new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
private static Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,14 +18,13 @@ package org.apache.lucene.bkdtree3d;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -33,8 +32,8 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
// TODO
|
||||
|
@ -83,10 +82,13 @@ class BKD3DTreeWriter {
|
|||
private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
|
||||
private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter writer;
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private OfflineSorter.ByteSequencesWriter offlineWriter;
|
||||
private GrowingHeapWriter heapWriter;
|
||||
|
||||
private Path tempInput;
|
||||
private IndexOutput tempInput;
|
||||
private final int maxPointsInLeafNode;
|
||||
private final int maxPointsSortInHeap;
|
||||
|
||||
|
@ -94,13 +96,15 @@ class BKD3DTreeWriter {
|
|||
|
||||
private final int[] scratchDocIDs;
|
||||
|
||||
public BKD3DTreeWriter() throws IOException {
|
||||
this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
|
||||
}
|
||||
|
||||
// TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
|
||||
public BKD3DTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
public BKD3DTreeWriter(Directory tempDir, String tempFileNamePrefix, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
scratchDocIDs = new int[maxPointsInLeafNode];
|
||||
|
@ -128,8 +132,8 @@ class BKD3DTreeWriter {
|
|||
private void switchToOffline() throws IOException {
|
||||
|
||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
||||
tempInput = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "in", "");
|
||||
writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
|
||||
offlineWriter = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
for(int i=0;i<pointCount;i++) {
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
scratchBytesOutput.writeInt(heapWriter.xs[i]);
|
||||
|
@ -138,7 +142,7 @@ class BKD3DTreeWriter {
|
|||
scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
|
||||
scratchBytesOutput.writeVLong(i);
|
||||
// TODO: can/should OfflineSorter optimize the fixed-width case?
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
}
|
||||
|
||||
heapWriter = null;
|
||||
|
@ -147,7 +151,7 @@ class BKD3DTreeWriter {
|
|||
public void add(int x, int y, int z, int docID) throws IOException {
|
||||
|
||||
if (pointCount >= maxPointsSortInHeap) {
|
||||
if (writer == null) {
|
||||
if (offlineWriter == null) {
|
||||
switchToOffline();
|
||||
}
|
||||
scratchBytesOutput.reset(scratchBytes);
|
||||
|
@ -156,7 +160,7 @@ class BKD3DTreeWriter {
|
|||
scratchBytesOutput.writeInt(z);
|
||||
scratchBytesOutput.writeVInt(docID);
|
||||
scratchBytesOutput.writeVLong(pointCount);
|
||||
writer.write(scratchBytes, 0, scratchBytes.length);
|
||||
offlineWriter.write(scratchBytes, 0, scratchBytes.length);
|
||||
} else {
|
||||
// Not too many points added yet, continue using heap:
|
||||
heapWriter.append(x, y, z, pointCount, docID);
|
||||
|
@ -167,7 +171,7 @@ class BKD3DTreeWriter {
|
|||
|
||||
/** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
|
||||
* as we recurse in {@link #build}. */
|
||||
private Writer convertToFixedWidth(Path in) throws IOException {
|
||||
private Writer convertToFixedWidth(String in) throws IOException {
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
scratch.grow(BYTES_PER_DOC);
|
||||
BytesRef bytes = scratch.get();
|
||||
|
@ -177,7 +181,7 @@ class BKD3DTreeWriter {
|
|||
Writer sortedWriter = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
reader = new OfflineSorter.ByteSequencesReader(in);
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(in, IOContext.READONCE));
|
||||
sortedWriter = getWriter(pointCount);
|
||||
for (long i=0;i<pointCount;i++) {
|
||||
boolean result = reader.read(scratch);
|
||||
|
@ -328,19 +332,18 @@ class BKD3DTreeWriter {
|
|||
}
|
||||
};
|
||||
|
||||
Path sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "sorted", "");
|
||||
boolean success = false;
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, cmp);
|
||||
String sortedFileName = sorter.sort(tempInput.getName());
|
||||
try {
|
||||
OfflineSorter sorter = new OfflineSorter(cmp);
|
||||
sorter.sort(tempInput, sorted);
|
||||
Writer writer = convertToFixedWidth(sorted);
|
||||
Writer writer = convertToFixedWidth(sortedFileName);
|
||||
success = true;
|
||||
return writer;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.rm(sorted);
|
||||
tempDir.deleteFile(sortedFileName);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorted);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -350,8 +353,8 @@ class BKD3DTreeWriter {
|
|||
public long finish(IndexOutput out) throws IOException {
|
||||
//System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter + " maxPointsInLeafNode=" + maxPointsInLeafNode);
|
||||
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
if (offlineWriter != null) {
|
||||
offlineWriter.close();
|
||||
}
|
||||
|
||||
LongBitSet bitSet = new LongBitSet(pointCount);
|
||||
|
@ -413,7 +416,9 @@ class BKD3DTreeWriter {
|
|||
xSortedWriter.destroy();
|
||||
ySortedWriter.destroy();
|
||||
zSortedWriter.destroy();
|
||||
IOUtils.rm(tempInput);
|
||||
if (tempInput != null) {
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
xSortedWriter.destroy();
|
||||
|
@ -430,7 +435,9 @@ class BKD3DTreeWriter {
|
|||
} catch (Throwable t) {
|
||||
// Suppress to keep throwing original exc
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput);
|
||||
if (tempInput != null) {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -911,7 +918,7 @@ class BKD3DTreeWriter {
|
|||
if (count < maxPointsSortInHeap) {
|
||||
return new HeapWriter((int) count);
|
||||
} else {
|
||||
return new OfflineWriter(count);
|
||||
return new OfflineWriter(tempDir, tempFileNamePrefix, count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,22 +17,23 @@ package org.apache.lucene.bkdtree3d;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||
final DocValuesConsumer delegate;
|
||||
final int maxPointsInLeafNode;
|
||||
|
@ -40,9 +41,14 @@ class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
|||
final IndexOutput out;
|
||||
final Map<Integer,Long> fieldIndexFPs = new HashMap<>();
|
||||
final SegmentWriteState state;
|
||||
final Directory tempDir;
|
||||
final String tempFileNamePrefix;
|
||||
|
||||
public Geo3DDocValuesConsumer(PlanetModel planetModel, DocValuesConsumer delegate, SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
public Geo3DDocValuesConsumer(Directory tempDir, String tempFileNamePrefix, PlanetModel planetModel, DocValuesConsumer delegate,
|
||||
SegmentWriteState state, int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
|
||||
BKD3DTreeWriter.verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.delegate = delegate;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
|
@ -106,7 +112,7 @@ class Geo3DDocValuesConsumer extends DocValuesConsumer implements Closeable {
|
|||
@Override
|
||||
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
|
||||
delegate.addBinaryField(field, values);
|
||||
BKD3DTreeWriter writer = new BKD3DTreeWriter(maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
BKD3DTreeWriter writer = new BKD3DTreeWriter(tempDir, tempFileNamePrefix, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
Iterator<BytesRef> valuesIt = values.iterator();
|
||||
for (int docID=0;docID<state.segmentInfo.maxDoc();docID++) {
|
||||
assert valuesIt.hasNext();
|
||||
|
|
|
@ -22,7 +22,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
|||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
|
||||
import org.apache.lucene.geo3d.PlanetModel;
|
||||
import org.apache.lucene.geo3d.Vector;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
|
@ -106,7 +105,7 @@ public class Geo3DDocValuesFormat extends DocValuesFormat {
|
|||
|
||||
@Override
|
||||
public DocValuesConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
|
||||
return new Geo3DDocValuesConsumer(planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
return new Geo3DDocValuesConsumer(state.directory, state.segmentInfo.name, planetModel, delegate.fieldsConsumer(state), state, maxPointsInLeafNode, maxPointsSortInHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,16 +17,14 @@ package org.apache.lucene.bkdtree3d;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
final class OfflineReader implements Reader {
|
||||
final InputStreamDataInput in;
|
||||
final IndexInput in;
|
||||
long countLeft;
|
||||
private int x;
|
||||
private int y;
|
||||
|
@ -34,18 +32,9 @@ final class OfflineReader implements Reader {
|
|||
private long ord;
|
||||
private int docID;
|
||||
|
||||
OfflineReader(Path tempFile, long start, long count) throws IOException {
|
||||
InputStream fis = Files.newInputStream(tempFile);
|
||||
long seekFP = start * BKD3DTreeWriter.BYTES_PER_DOC;
|
||||
long skipped = 0;
|
||||
while (skipped < seekFP) {
|
||||
long inc = fis.skip(seekFP - skipped);
|
||||
skipped += inc;
|
||||
if (inc == 0) {
|
||||
throw new RuntimeException("skip returned 0");
|
||||
}
|
||||
}
|
||||
in = new InputStreamDataInput(new BufferedInputStream(fis));
|
||||
OfflineReader(Directory tempDir, String tempFileName, long start, long count) throws IOException {
|
||||
in = tempDir.openInput(tempFileName, IOContext.READONCE);
|
||||
in.seek(start * BKD3DTreeWriter.BYTES_PER_DOC);
|
||||
this.countLeft = count;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,29 +17,26 @@ package org.apache.lucene.bkdtree3d;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
final class OfflineWriter implements Writer {
|
||||
|
||||
final Path tempFile;
|
||||
final Directory tempDir;
|
||||
final IndexOutput out;
|
||||
final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
|
||||
final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
|
||||
final OutputStreamDataOutput out;
|
||||
final long count;
|
||||
private long countWritten;
|
||||
private boolean closed;
|
||||
|
||||
public OfflineWriter(long count) throws IOException {
|
||||
tempFile = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "size" + count + ".", "");
|
||||
out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
|
||||
public OfflineWriter(Directory tempDir, String tempFileNamePrefix, long count) throws IOException {
|
||||
this.tempDir = tempDir;
|
||||
out = tempDir.createTempOutput(tempFileNamePrefix, "bkd3d", IOContext.DEFAULT);
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
|
@ -56,7 +53,7 @@ final class OfflineWriter implements Writer {
|
|||
@Override
|
||||
public Reader getReader(long start) throws IOException {
|
||||
assert closed;
|
||||
return new OfflineReader(tempFile, start, count-start);
|
||||
return new OfflineReader(tempDir, out.getName(), start, count-start);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,11 +67,11 @@ final class OfflineWriter implements Writer {
|
|||
|
||||
@Override
|
||||
public void destroy() throws IOException {
|
||||
IOUtils.rm(tempFile);
|
||||
tempDir.deleteFile(out.getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OfflineWriter(count=" + count + " tempFile=" + tempFile + ")";
|
||||
return "OfflineWriter(count=" + count + " tempFileName=" + out.getName() + ")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,16 @@ package org.apache.lucene.bkdtree3d;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene53.Lucene53Codec;
|
||||
|
@ -50,6 +60,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -58,16 +69,6 @@ import org.junit.BeforeClass;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueCenter;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMax;
|
||||
import static org.apache.lucene.bkdtree3d.Geo3DDocValuesFormat.decodeValueMin;
|
||||
|
@ -87,7 +88,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
@ -108,7 +109,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testPlanetModelChanged() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
|
@ -137,10 +138,10 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBKDBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter();
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d");
|
||||
|
||||
w.add(0, 0, 0, 0);
|
||||
w.add(1, 1, 1, 1);
|
||||
|
@ -245,7 +246,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
public void testBKDRandom() throws Exception {
|
||||
List<Point> points = new ArrayList<>();
|
||||
int numPoints = atLeast(10000);
|
||||
Directory dir = newDirectory();
|
||||
Directory dir = getDirectory();
|
||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||
int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
|
||||
|
||||
|
@ -254,7 +255,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
PlanetModel planetModel = getPlanetModel();
|
||||
final double planetMax = planetModel.getMaximumMagnitude();
|
||||
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter(maxPointsInLeaf, maxPointsSortInHeap);
|
||||
BKD3DTreeWriter w = new BKD3DTreeWriter(dir, "bkd3d", maxPointsInLeaf, maxPointsSortInHeap);
|
||||
for(int docID=0;docID<numPoints;docID++) {
|
||||
Point point;
|
||||
if (docID > 0 && random().nextInt(30) == 17) {
|
||||
|
@ -924,7 +925,7 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
if (lats.length > 100000) {
|
||||
dir = newFSDirectory(createTempDir("TestBKDTree"));
|
||||
} else {
|
||||
dir = newDirectory();
|
||||
dir = getDirectory();
|
||||
}
|
||||
Set<Integer> deleted = new HashSet<>();
|
||||
// RandomIndexWriter is too slow here:
|
||||
|
@ -1059,4 +1060,12 @@ public class TestGeo3DPointField extends LuceneTestCase {
|
|||
}
|
||||
IOUtils.close(r, dir);
|
||||
}
|
||||
|
||||
private static Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,14 +18,15 @@ package org.apache.lucene.search.suggest;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -41,12 +42,14 @@ import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
|
|||
public class SortedInputIterator implements InputIterator {
|
||||
|
||||
private final InputIterator source;
|
||||
private Path tempInput;
|
||||
private Path tempSorted;
|
||||
private IndexOutput tempInput;
|
||||
private String tempSortedFileName;
|
||||
private final ByteSequencesReader reader;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
private final boolean hasPayloads;
|
||||
private final boolean hasContexts;
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
private boolean done = false;
|
||||
|
||||
private long weight;
|
||||
|
@ -58,19 +61,21 @@ public class SortedInputIterator implements InputIterator {
|
|||
* Creates a new sorted wrapper, using {@link
|
||||
* BytesRef#getUTF8SortedAsUnicodeComparator} for
|
||||
* sorting. */
|
||||
public SortedInputIterator(InputIterator source) throws IOException {
|
||||
this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
public SortedInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException {
|
||||
this(tempDir, tempFileNamePrefix, source, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new sorted wrapper, sorting by BytesRef
|
||||
* (ascending) then cost (ascending).
|
||||
*/
|
||||
public SortedInputIterator(InputIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||
public SortedInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source, Comparator<BytesRef> comparator) throws IOException {
|
||||
this.hasPayloads = source.hasPayloads();
|
||||
this.hasContexts = source.hasContexts();
|
||||
this.source = source;
|
||||
this.comparator = comparator;
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
this.reader = sort();
|
||||
}
|
||||
|
||||
|
@ -83,7 +88,7 @@ public class SortedInputIterator implements InputIterator {
|
|||
try {
|
||||
ByteArrayDataInput input = new ByteArrayDataInput();
|
||||
if (reader.read(scratch)) {
|
||||
final BytesRef bytes = scratch.get();
|
||||
final BytesRef bytes = scratch.get();
|
||||
weight = decode(bytes, input);
|
||||
if (hasPayloads) {
|
||||
payload = decodePayload(bytes, input);
|
||||
|
@ -168,10 +173,9 @@ public class SortedInputIterator implements InputIterator {
|
|||
};
|
||||
|
||||
private ByteSequencesReader sort() throws IOException {
|
||||
String prefix = getClass().getSimpleName();
|
||||
Path directory = OfflineSorter.getDefaultTempDir();
|
||||
tempInput = Files.createTempFile(directory, prefix, ".input");
|
||||
tempSorted = Files.createTempFile(directory, prefix, ".sorted");
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, tieBreakByCostComparator);
|
||||
tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
|
||||
|
||||
final OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
boolean success = false;
|
||||
|
@ -184,8 +188,8 @@ public class SortedInputIterator implements InputIterator {
|
|||
encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight());
|
||||
}
|
||||
writer.close();
|
||||
new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
|
||||
ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempSorted);
|
||||
tempSortedFileName = sorter.sort(tempInput.getName());
|
||||
ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
|
||||
success = true;
|
||||
return reader;
|
||||
|
||||
|
@ -203,16 +207,12 @@ public class SortedInputIterator implements InputIterator {
|
|||
}
|
||||
|
||||
private void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
IOUtils.close(reader);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.deleteFilesIfExist(tempInput, tempSorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir,
|
||||
tempInput == null ? null : tempInput.getName(),
|
||||
tempSortedFileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,11 +17,7 @@ package org.apache.lucene.search.suggest.analyzing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -39,6 +35,9 @@ import org.apache.lucene.store.ByteArrayDataInput;
|
|||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -64,6 +63,8 @@ import org.apache.lucene.util.fst.Util;
|
|||
import org.apache.lucene.util.fst.Util.Result;
|
||||
import org.apache.lucene.util.fst.Util.TopResults;
|
||||
|
||||
import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
|
||||
/**
|
||||
* Suggester that first analyzes the surface form, adds the
|
||||
* analyzed form to a weighted FST, and then does the same
|
||||
|
@ -150,14 +151,14 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
private final boolean preserveSep;
|
||||
|
||||
/** Include this flag in the options parameter to {@link
|
||||
* #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to always
|
||||
* #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)} to always
|
||||
* return the exact match first, regardless of score. This
|
||||
* has no performance impact but could result in
|
||||
* low-quality suggestions. */
|
||||
public static final int EXACT_FIRST = 1;
|
||||
|
||||
/** Include this flag in the options parameter to {@link
|
||||
* #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)} to preserve
|
||||
* #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)} to preserve
|
||||
* token separators when matching. */
|
||||
public static final int PRESERVE_SEP = 2;
|
||||
|
||||
|
@ -179,6 +180,9 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
* SynonymFilter). */
|
||||
private final int maxGraphExpansions;
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
/** Highest number of analyzed paths we saw for any single
|
||||
* input surface form. For analyzers that never create
|
||||
* graphs this will always be 1. */
|
||||
|
@ -195,21 +199,21 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
private long count = 0;
|
||||
|
||||
/**
|
||||
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
|
||||
* Calls {@link #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)
|
||||
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
|
||||
* PRESERVE_SEP, 256, -1, true)}
|
||||
*/
|
||||
public AnalyzingSuggester(Analyzer analyzer) {
|
||||
this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
|
||||
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer analyzer) {
|
||||
this(tempDir, tempFileNamePrefix, analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean)
|
||||
* Calls {@link #AnalyzingSuggester(Directory,String,Analyzer,Analyzer,int,int,int,boolean)
|
||||
* AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
|
||||
* PRESERVE_SEP, 256, -1, true)}
|
||||
*/
|
||||
public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
|
||||
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -230,7 +234,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
* @param preservePositionIncrements Whether position holes
|
||||
* should appear in the automata
|
||||
*/
|
||||
public AnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
public AnalyzingSuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
boolean preservePositionIncrements) {
|
||||
this.indexAnalyzer = indexAnalyzer;
|
||||
this.queryAnalyzer = queryAnalyzer;
|
||||
|
@ -254,6 +258,8 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
}
|
||||
this.maxGraphExpansions = maxGraphExpansions;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
}
|
||||
|
||||
/** Returns byte size of the underlying FST. */
|
||||
|
@ -396,20 +402,21 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
if (iterator.hasContexts()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support contexts");
|
||||
}
|
||||
String prefix = getClass().getSimpleName();
|
||||
Path directory = OfflineSorter.getDefaultTempDir();
|
||||
Path tempInput = Files.createTempFile(directory, prefix, ".input");
|
||||
Path tempSorted = Files.createTempFile(directory, prefix, ".sorted");
|
||||
|
||||
hasPayloads = iterator.hasPayloads();
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, new AnalyzingComparator(hasPayloads));
|
||||
|
||||
IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
|
||||
|
||||
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
|
||||
|
||||
boolean success = false;
|
||||
String tempSortedFileName = null;
|
||||
|
||||
count = 0;
|
||||
byte buffer[] = new byte[8];
|
||||
try {
|
||||
|
@ -477,12 +484,12 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
writer.close();
|
||||
|
||||
// Sort all input/output pairs (required by FST.Builder):
|
||||
new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
|
||||
tempSortedFileName = sorter.sort(tempInput.getName());
|
||||
|
||||
// Free disk space:
|
||||
Files.delete(tempInput);
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
|
||||
|
||||
PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
|
||||
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
@ -570,16 +577,9 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
fst = builder.finish();
|
||||
|
||||
//Util.dotToFile(fst, "/tmp/suggest.dot");
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(reader, writer);
|
||||
|
||||
if (success) {
|
||||
IOUtils.deleteFilesIfExist(tempInput, tempSorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // javadocs
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
@ -113,8 +114,8 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
*
|
||||
* @param analyzer the analyzer used for this suggester
|
||||
*/
|
||||
public FuzzySuggester(Analyzer analyzer) {
|
||||
this(analyzer, analyzer);
|
||||
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer analyzer) {
|
||||
this(tempDir, tempFileNamePrefix, analyzer, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -125,8 +126,8 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
* @param queryAnalyzer
|
||||
* Analyzer that will be used for analyzing query text during lookup
|
||||
*/
|
||||
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
|
||||
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
|
||||
DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE);
|
||||
}
|
||||
|
||||
|
@ -154,11 +155,11 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
|
||||
* @param unicodeAware operate Unicode code points instead of bytes.
|
||||
*/
|
||||
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||
public FuzzySuggester(Directory tempDir, String tempFileNamePrefix, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
|
||||
int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
boolean preservePositionIncrements, int maxEdits, boolean transpositions,
|
||||
int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
|
||||
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
|
||||
super(tempDir, tempFileNamePrefix, indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
|
||||
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
|
||||
}
|
||||
|
|
|
@ -19,10 +19,10 @@ package org.apache.lucene.search.suggest.fst;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -34,48 +34,49 @@ import org.apache.lucene.util.OfflineSorter;
|
|||
* @lucene.internal
|
||||
*/
|
||||
public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
||||
private final OfflineSorter sort;
|
||||
private final OfflineSorter sorter;
|
||||
private OfflineSorter.ByteSequencesWriter writer;
|
||||
private Path input;
|
||||
private Path sorted;
|
||||
private IndexOutput input;
|
||||
private String sortedFileName;
|
||||
|
||||
/**
|
||||
* Will buffer all sequences to a temporary file and then sort (all on-disk).
|
||||
*/
|
||||
public ExternalRefSorter(OfflineSorter sort) throws IOException {
|
||||
this.sort = sort;
|
||||
this.input = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "RefSorter-", ".raw");
|
||||
this.writer = new OfflineSorter.ByteSequencesWriter(input);
|
||||
public ExternalRefSorter(OfflineSorter sorter) throws IOException {
|
||||
this.sorter = sorter;
|
||||
this.input = sorter.getDirectory().createTempOutput(sorter.getTempFileNamePrefix(), "RefSorterRaw", IOContext.DEFAULT);
|
||||
this.writer = new OfflineSorter.ByteSequencesWriter(this.input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(BytesRef utf8) throws IOException {
|
||||
if (writer == null) throw new IllegalStateException();
|
||||
if (writer == null) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
writer.write(utf8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRefIterator iterator() throws IOException {
|
||||
if (sorted == null) {
|
||||
if (sortedFileName == null) {
|
||||
closeWriter();
|
||||
|
||||
sorted = Files.createTempFile(OfflineSorter.getDefaultTempDir(), "RefSorter-", ".sorted");
|
||||
boolean success = false;
|
||||
try {
|
||||
sort.sort(input, sorted);
|
||||
sortedFileName = sorter.sort(input.getName());
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
Files.delete(input);
|
||||
sorter.getDirectory().deleteFile(input.getName());
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(input);
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(), input.getName());
|
||||
}
|
||||
}
|
||||
|
||||
input = null;
|
||||
}
|
||||
|
||||
return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorted));
|
||||
return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorter.getDirectory().openInput(sortedFileName, IOContext.READONCE)));
|
||||
}
|
||||
|
||||
private void closeWriter() throws IOException {
|
||||
|
@ -90,16 +91,12 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
|||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
closeWriter();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.deleteFilesIfExist(input, sorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(input, sorted);
|
||||
}
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(),
|
||||
input == null ? null : input.getName(),
|
||||
sortedFileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -142,6 +139,6 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
|||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return sort.getComparator();
|
||||
return sorter.getComparator();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,6 @@ package org.apache.lucene.search.suggest.fst;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -34,6 +32,9 @@ import org.apache.lucene.store.ByteArrayDataInput;
|
|||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -42,7 +43,6 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.OfflineSorter.SortInfo;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.NoOutputs;
|
||||
|
@ -76,7 +76,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
* An invalid bucket count if we're creating an object
|
||||
* of this class from an existing FST.
|
||||
*
|
||||
* @see #FSTCompletionLookup(FSTCompletion, boolean)
|
||||
* @see #FSTCompletionLookup(Directory, String, FSTCompletion, boolean)
|
||||
*/
|
||||
private static int INVALID_BUCKETS_COUNT = -1;
|
||||
|
||||
|
@ -89,6 +89,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
*/
|
||||
private final static int sharedTailLength = 5;
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
private int buckets;
|
||||
private boolean exactMatchFirst;
|
||||
|
||||
|
@ -105,14 +108,21 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
/** Number of entries the lookup was built with */
|
||||
private long count = 0;
|
||||
|
||||
/**
|
||||
* This constructor should only be used to read a previously saved suggester.
|
||||
*/
|
||||
public FSTCompletionLookup() {
|
||||
this(null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor prepares for creating a suggested FST using the
|
||||
* {@link #build(InputIterator)} method. The number of weight
|
||||
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
|
||||
* exact matches are promoted to the top of the suggestions list.
|
||||
*/
|
||||
public FSTCompletionLookup() {
|
||||
this(FSTCompletion.DEFAULT_BUCKETS, true);
|
||||
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix) {
|
||||
this(tempDir, tempFileNamePrefix, FSTCompletion.DEFAULT_BUCKETS, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -128,9 +138,11 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
* suggestions list. Otherwise they appear in the order of
|
||||
* discretized weight and alphabetical within the bucket.
|
||||
*/
|
||||
public FSTCompletionLookup(int buckets, boolean exactMatchFirst) {
|
||||
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, int buckets, boolean exactMatchFirst) {
|
||||
this.buckets = buckets;
|
||||
this.exactMatchFirst = exactMatchFirst;
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,8 +155,8 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
* suggestions list. Otherwise they appear in the order of
|
||||
* discretized weight and alphabetical within the bucket.
|
||||
*/
|
||||
public FSTCompletionLookup(FSTCompletion completion, boolean exactMatchFirst) {
|
||||
this(INVALID_BUCKETS_COUNT, exactMatchFirst);
|
||||
public FSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, FSTCompletion completion, boolean exactMatchFirst) {
|
||||
this(tempDir, tempFileNamePrefix, INVALID_BUCKETS_COUNT, exactMatchFirst);
|
||||
this.normalCompletion = new FSTCompletion(
|
||||
completion.getFST(), false, exactMatchFirst);
|
||||
this.higherWeightsCompletion = new FSTCompletion(
|
||||
|
@ -159,23 +171,23 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (iterator.hasContexts()) {
|
||||
throw new IllegalArgumentException("this suggester doesn't support contexts");
|
||||
}
|
||||
Path tempInput = Files.createTempFile(
|
||||
OfflineSorter.getDefaultTempDir(), FSTCompletionLookup.class.getSimpleName(), ".input");
|
||||
Path tempSorted = Files.createTempFile(
|
||||
OfflineSorter.getDefaultTempDir(), FSTCompletionLookup.class.getSimpleName(), ".sorted");
|
||||
|
||||
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
|
||||
ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
|
||||
IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
|
||||
String tempSortedFileName = null;
|
||||
|
||||
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
ExternalRefSorter sorter = null;
|
||||
|
||||
// Push floats up front before sequences to sort them. For now, assume they are non-negative.
|
||||
// If negative floats are allowed some trickery needs to be done to find their byte order.
|
||||
boolean success = false;
|
||||
count = 0;
|
||||
try {
|
||||
byte [] buffer = new byte [0];
|
||||
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
|
||||
BytesRef spare;
|
||||
int inputLineCount = 0;
|
||||
while ((spare = iterator.next()) != null) {
|
||||
if (spare.length + 4 >= buffer.length) {
|
||||
buffer = ArrayUtil.grow(buffer, spare.length + 4);
|
||||
|
@ -185,18 +197,19 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
output.writeInt(encodeWeight(iterator.weight()));
|
||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||
writer.write(buffer, 0, output.getPosition());
|
||||
inputLineCount++;
|
||||
}
|
||||
writer.close();
|
||||
|
||||
// We don't know the distribution of scores and we need to bucket them, so we'll sort
|
||||
// and divide into equal buckets.
|
||||
SortInfo info = new OfflineSorter().sort(tempInput, tempSorted);
|
||||
Files.delete(tempInput);
|
||||
FSTCompletionBuilder builder = new FSTCompletionBuilder(
|
||||
buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);
|
||||
tempSortedFileName = sorter.sort(tempInput.getName());
|
||||
tempDir.deleteFile(tempInput.getName());
|
||||
|
||||
final int inputLines = info.lines;
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
|
||||
FSTCompletionBuilder builder = new FSTCompletionBuilder(
|
||||
buckets, externalSorter, sharedTailLength);
|
||||
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempDir.openInput(tempSortedFileName, IOContext.READONCE));
|
||||
long line = 0;
|
||||
int previousBucket = 0;
|
||||
int previousScore = 0;
|
||||
|
@ -211,7 +224,7 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (line > 0 && currentScore == previousScore) {
|
||||
bucket = previousBucket;
|
||||
} else {
|
||||
bucket = (int) (line * buckets / inputLines);
|
||||
bucket = (int) (line * buckets / inputLineCount);
|
||||
}
|
||||
previousScore = currentScore;
|
||||
previousBucket = bucket;
|
||||
|
@ -231,15 +244,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
this.normalCompletion = new FSTCompletion(
|
||||
higherWeightsCompletion.getFST(), false, exactMatchFirst);
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(reader, writer, sorter);
|
||||
|
||||
if (success) {
|
||||
Files.delete(tempSorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
|
||||
}
|
||||
IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -285,8 +292,9 @@ public class FSTCompletionLookup extends Lookup implements Accountable {
|
|||
@Override
|
||||
public synchronized boolean store(DataOutput output) throws IOException {
|
||||
output.writeVLong(count);
|
||||
if (this.normalCompletion == null || normalCompletion.getFST() == null)
|
||||
if (this.normalCompletion == null || normalCompletion.getFST() == null) {
|
||||
return false;
|
||||
}
|
||||
normalCompletion.getFST().save(output);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.store.ByteArrayDataInput;
|
|||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -78,11 +79,14 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
/** Number of entries the lookup was built with */
|
||||
private long count = 0;
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
/**
|
||||
* Calls {@link #WFSTCompletionLookup(boolean) WFSTCompletionLookup(true)}
|
||||
* Calls {@link #WFSTCompletionLookup(Directory,String,boolean) WFSTCompletionLookup(null,null,true)}
|
||||
*/
|
||||
public WFSTCompletionLookup() {
|
||||
this(true);
|
||||
public WFSTCompletionLookup(Directory tempDir, String tempFileNamePrefix) {
|
||||
this(tempDir, tempFileNamePrefix, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -93,8 +97,10 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
* of score. This has no performance impact, but could result
|
||||
* in low-quality suggestions.
|
||||
*/
|
||||
public WFSTCompletionLookup(boolean exactFirst) {
|
||||
public WFSTCompletionLookup(Directory tempDir, String tempFileNamePrefix, boolean exactFirst) {
|
||||
this.exactFirst = exactFirst;
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -107,7 +113,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
}
|
||||
count = 0;
|
||||
BytesRef scratch = new BytesRef();
|
||||
InputIterator iter = new WFSTInputIterator(iterator);
|
||||
InputIterator iter = new WFSTInputIterator(tempDir, tempFileNamePrefix, iterator);
|
||||
IntsRefBuilder scratchInts = new IntsRefBuilder();
|
||||
BytesRefBuilder previous = null;
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
|
@ -264,8 +270,8 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
|
||||
private final class WFSTInputIterator extends SortedInputIterator {
|
||||
|
||||
WFSTInputIterator(InputIterator source) throws IOException {
|
||||
super(source);
|
||||
WFSTInputIterator(Directory tempDir, String tempFileNamePrefix, InputIterator source) throws IOException {
|
||||
super(tempDir, tempFileNamePrefix, source);
|
||||
assert source.hasPayloads() == false;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,11 +27,10 @@ import org.apache.lucene.search.suggest.Lookup;
|
|||
import org.apache.lucene.search.suggest.SortedInputIterator;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/**
|
||||
* Suggest implementation based on a
|
||||
|
@ -45,12 +44,26 @@ public class TSTLookup extends Lookup {
|
|||
|
||||
/** Number of entries the lookup was built with */
|
||||
private long count = 0;
|
||||
|
||||
private final Directory tempDir;
|
||||
private final String tempFileNamePrefix;
|
||||
|
||||
/**
|
||||
* Creates a new TSTLookup with an empty Ternary Search Tree.
|
||||
* @see #build(InputIterator)
|
||||
*/
|
||||
public TSTLookup() {}
|
||||
public TSTLookup() {
|
||||
this(null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new TSTLookup, for building.
|
||||
* @see #build(InputIterator)
|
||||
*/
|
||||
public TSTLookup(Directory tempDir, String tempFileNamePrefix) {
|
||||
this.tempDir = tempDir;
|
||||
this.tempFileNamePrefix = tempFileNamePrefix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void build(InputIterator iterator) throws IOException {
|
||||
|
@ -63,7 +76,7 @@ public class TSTLookup extends Lookup {
|
|||
root = new TernaryTreeNode();
|
||||
|
||||
// make sure it's sorted and the comparator uses UTF16 sort order
|
||||
iterator = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
iterator = new SortedInputIterator(tempDir, tempFileNamePrefix, iterator, BytesRef.getUTF8SortedAsUTF16Comparator());
|
||||
count = 0;
|
||||
ArrayList<String> tokens = new ArrayList<>();
|
||||
ArrayList<Number> vals = new ArrayList<>();
|
||||
|
|
|
@ -18,13 +18,15 @@ package org.apache.lucene.search.suggest;
|
|||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Random;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
|
||||
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
|
||||
import org.apache.lucene.search.suggest.tst.TSTLookup;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
@ -58,11 +60,26 @@ public class PersistenceTest extends LuceneTestCase {
|
|||
runTest(FSTCompletionLookup.class, false);
|
||||
}
|
||||
|
||||
private void runTest(Class<? extends Lookup> lookupClass,
|
||||
boolean supportsExactWeights) throws Exception {
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
|
||||
private void runTest(Class<? extends Lookup> lookupClass, boolean supportsExactWeights) throws Exception {
|
||||
|
||||
// Add all input keys.
|
||||
Lookup lookup = lookupClass.newInstance();
|
||||
Lookup lookup;
|
||||
Directory tempDir = getDirectory();
|
||||
if (lookupClass == TSTLookup.class) {
|
||||
lookup = new TSTLookup(tempDir, "suggest");
|
||||
} else if (lookupClass == FSTCompletionLookup.class) {
|
||||
lookup = new FSTCompletionLookup(tempDir, "suggest");
|
||||
} else {
|
||||
lookup = lookupClass.newInstance();
|
||||
}
|
||||
Input[] keys = new Input[this.keys.length];
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
keys[i] = new Input(this.keys[i], i);
|
||||
|
@ -92,5 +109,6 @@ public class PersistenceTest extends LuceneTestCase {
|
|||
previous = lookupResult.value;
|
||||
}
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,18 +26,22 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestInputIterator extends LuceneTestCase {
|
||||
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
InputArrayIterator iterator = new InputArrayIterator(new Input[0]);
|
||||
InputIterator wrapper = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
assertNull(wrapper.next());
|
||||
wrapper = new UnsortedInputIterator(iterator);
|
||||
assertNull(wrapper.next());
|
||||
try (Directory dir = getDirectory()) {
|
||||
InputIterator wrapper = new SortedInputIterator(dir, "sorted", iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
assertNull(wrapper.next());
|
||||
wrapper = new UnsortedInputIterator(iterator);
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
}
|
||||
|
||||
public void testTerms() throws Exception {
|
||||
|
@ -77,45 +81,51 @@ public class TestInputIterator extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// test the sorted iterator wrapper with payloads
|
||||
InputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
|
||||
while (expected.hasNext()) {
|
||||
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
|
||||
try (Directory tempDir = getDirectory()) {
|
||||
InputIterator wrapper = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsorted), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, BytesRef>>> expected = sorted.entrySet().iterator();
|
||||
while (expected.hasNext()) {
|
||||
Map.Entry<BytesRef,SimpleEntry<Long, BytesRef>> entry = expected.next();
|
||||
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
assertEquals(entry.getValue().getValue(), wrapper.payload());
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
assertEquals(entry.getValue().getValue(), wrapper.payload());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
|
||||
// test the sorted iterator wrapper with contexts
|
||||
wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator();
|
||||
while (actualEntries.hasNext()) {
|
||||
Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next();
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
Set<BytesRef> actualCtxs = entry.getValue().getValue();
|
||||
assertEquals(actualCtxs, wrapper.contexts());
|
||||
try (Directory tempDir = getDirectory()) {
|
||||
InputIterator wrapper = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsortedWithContexts), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator();
|
||||
while (actualEntries.hasNext()) {
|
||||
Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next();
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
Set<BytesRef> actualCtxs = entry.getValue().getValue();
|
||||
assertEquals(actualCtxs, wrapper.contexts());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
|
||||
|
||||
// test the sorted iterator wrapper with contexts and payload
|
||||
wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithPayloadAndContext), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>>> expectedPayloadContextEntries = sortedWithPayloadAndContext.entrySet().iterator();
|
||||
while (expectedPayloadContextEntries.hasNext()) {
|
||||
Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>> entry = expectedPayloadContextEntries.next();
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
Set<BytesRef> actualCtxs = entry.getValue().getValue().getValue();
|
||||
assertEquals(actualCtxs, wrapper.contexts());
|
||||
BytesRef actualPayload = entry.getValue().getValue().getKey();
|
||||
assertEquals(actualPayload, wrapper.payload());
|
||||
try (Directory tempDir = getDirectory()) {
|
||||
InputIterator wrapper = new SortedInputIterator(tempDir, "sorter", new InputArrayIterator(unsortedWithPayloadAndContext), comparator);
|
||||
Iterator<Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>>> expectedPayloadContextEntries = sortedWithPayloadAndContext.entrySet().iterator();
|
||||
while (expectedPayloadContextEntries.hasNext()) {
|
||||
Map.Entry<BytesRef, SimpleEntry<Long, SimpleEntry<BytesRef, Set<BytesRef>>>> entry = expectedPayloadContextEntries.next();
|
||||
assertEquals(entry.getKey(), wrapper.next());
|
||||
assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
|
||||
Set<BytesRef> actualCtxs = entry.getValue().getValue().getValue();
|
||||
assertEquals(actualCtxs, wrapper.contexts());
|
||||
BytesRef actualPayload = entry.getValue().getValue().getKey();
|
||||
assertEquals(actualPayload, wrapper.payload());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
}
|
||||
assertNull(wrapper.next());
|
||||
|
||||
|
||||
// test the unsorted iterator wrapper with payloads
|
||||
wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
|
||||
InputIterator wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
|
||||
TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();
|
||||
BytesRef key;
|
||||
while ((key = wrapper.next()) != null) {
|
||||
|
@ -126,19 +136,21 @@ public class TestInputIterator extends LuceneTestCase {
|
|||
assertEquals(sorted, actual);
|
||||
|
||||
// test the sorted iterator wrapper without payloads
|
||||
InputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparator);
|
||||
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
|
||||
while (expectedWithoutPayload.hasNext()) {
|
||||
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
|
||||
try (Directory tempDir = getDirectory()) {
|
||||
InputIterator wrapperWithoutPayload = new SortedInputIterator(tempDir, "sorted", new InputArrayIterator(unsortedWithoutPayload), comparator);
|
||||
Iterator<Map.Entry<BytesRef, Long>> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
|
||||
while (expectedWithoutPayload.hasNext()) {
|
||||
Map.Entry<BytesRef, Long> entry = expectedWithoutPayload.next();
|
||||
|
||||
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
|
||||
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
|
||||
assertNull(wrapperWithoutPayload.payload());
|
||||
assertEquals(entry.getKey(), wrapperWithoutPayload.next());
|
||||
assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
|
||||
assertNull(wrapperWithoutPayload.payload());
|
||||
}
|
||||
assertNull(wrapperWithoutPayload.next());
|
||||
}
|
||||
assertNull(wrapperWithoutPayload.next());
|
||||
|
||||
// test the unsorted iterator wrapper without payloads
|
||||
wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
|
||||
InputIterator wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
|
||||
TreeMap<BytesRef, Long> actualWithoutPayload = new TreeMap<>();
|
||||
while ((key = wrapperWithoutPayload.next()) != null) {
|
||||
long value = wrapperWithoutPayload.weight();
|
||||
|
@ -157,4 +169,12 @@ public class TestInputIterator extends LuceneTestCase {
|
|||
return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
|
||||
| ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,8 +34,8 @@ import java.util.Set;
|
|||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
|
||||
import org.apache.lucene.analysis.CannedBinaryTokenStream;
|
||||
import org.apache.lucene.analysis.CannedBinaryTokenStream.BinaryToken;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
|
@ -48,9 +48,11 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
|
@ -58,7 +60,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
|
||||
|
||||
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
|
||||
public void testKeyword() throws Exception {
|
||||
Iterable<Input> keys = shuffle(
|
||||
|
@ -71,8 +73,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("barbara", 1)
|
||||
);
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
// top N of 2, but only foo is available
|
||||
|
@ -106,7 +110,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals("barbara", results.get(2).key.toString());
|
||||
assertEquals(6, results.get(2).value, 0.01F);
|
||||
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
public void testKeywordWithPayloads() throws Exception {
|
||||
|
@ -119,7 +123,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("barbara", 6, new BytesRef("for all the fish")));
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
for (int i = 0; i < 2; i++) {
|
||||
// top N of 2, but only foo is available
|
||||
|
@ -160,7 +165,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(6, results.get(2).value, 0.01F);
|
||||
assertEquals(new BytesRef("for all the fish"), results.get(2).payload);
|
||||
}
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
public void testRandomRealisticKeys() throws IOException {
|
||||
|
@ -180,7 +185,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
Analyzer indexAnalyzer = new MockAnalyzer(random());
|
||||
Analyzer queryAnalyzer = new MockAnalyzer(random());
|
||||
AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(indexAnalyzer, queryAnalyzer,
|
||||
Directory tempDir = getDirectory();
|
||||
|
||||
AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(tempDir, "suggest", indexAnalyzer, queryAnalyzer,
|
||||
AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, random().nextBoolean());
|
||||
boolean doPayloads = random().nextBoolean();
|
||||
if (doPayloads) {
|
||||
|
@ -205,7 +212,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer);
|
||||
IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer, tempDir);
|
||||
}
|
||||
|
||||
// TODO: more tests
|
||||
|
@ -217,8 +224,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("the ghost of christmas past", 50),
|
||||
};
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(standard, standard,
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard, standard,
|
||||
AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false);
|
||||
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
@ -240,17 +248,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals("the ghost of christmas past", results.get(0).key.toString());
|
||||
assertEquals(50, results.get(0).value, 0.01F);
|
||||
|
||||
standard.close();
|
||||
IOUtils.close(standard, tempDir);
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", standard);
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
standard.close();
|
||||
IOUtils.close(standard, tempDir);
|
||||
}
|
||||
|
||||
public void testNoSeps() throws Exception {
|
||||
|
@ -262,7 +271,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
int options = 0;
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
// TODO: would be nice if "ab " would allow the test to
|
||||
// pass, and more generally if the analyzer can know
|
||||
|
@ -275,7 +285,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
// complete to "abcd", which has higher weight so should
|
||||
// appear first:
|
||||
assertEquals("abcd", r.get(0).key.toString());
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testGraphDups() throws Exception {
|
||||
|
@ -330,7 +340,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("wi fi network is fast", 10),
|
||||
};
|
||||
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
|
||||
if (VERBOSE) {
|
||||
|
@ -341,7 +352,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(50, results.get(0).value);
|
||||
assertEquals("wi fi network is fast", results.get(1).key);
|
||||
assertEquals(10, results.get(1).value);
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
public void testInputPathRequired() throws Exception {
|
||||
|
@ -396,11 +407,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("ab xc", 50),
|
||||
new Input("ba xd", 50),
|
||||
};
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("ab x", false, 1);
|
||||
assertTrue(results.size() == 1);
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
private static Token token(String term, int posInc, int posLength) {
|
||||
|
@ -471,7 +483,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
int options = AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP;
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 1),
|
||||
new Input("x y z", 3),
|
||||
|
@ -505,13 +518,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testNonExactFirst() throws Exception {
|
||||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 1),
|
||||
|
@ -543,7 +557,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
// Holds surface form separately:
|
||||
|
@ -764,7 +778,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a,
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a,
|
||||
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, true);
|
||||
if (doPayloads) {
|
||||
suggester.build(new InputArrayIterator(shuffle(payloadKeys)));
|
||||
|
@ -882,12 +897,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 2, -1, true);
|
||||
suggester.build(new InputArrayIterator(shuffle(new Input("a", 40),
|
||||
new Input("a ", 50), new Input(" a", 60))));
|
||||
|
||||
|
@ -897,12 +913,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(60, results.get(0).value);
|
||||
assertEquals("a ", results.get(1).key);
|
||||
assertEquals(50, results.get(1).value);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testQueueExhaustion() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("a", 2),
|
||||
|
@ -912,14 +929,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}));
|
||||
|
||||
suggester.lookup("a", false, 4);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testExactFirstMissingResult() throws Exception {
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("a", 5),
|
||||
|
@ -959,7 +977,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(4, results.get(1).value);
|
||||
assertEquals("a b", results.get(2).key);
|
||||
assertEquals(3, results.get(2).value);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults() throws Exception {
|
||||
|
@ -986,7 +1004,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(shuffle(
|
||||
new Input("hambone", 6),
|
||||
|
@ -1018,7 +1037,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(6, results.get(0).value);
|
||||
assertEquals("nellie", results.get(1).key);
|
||||
assertEquals(5, results.get(1).value);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults2() throws Exception {
|
||||
|
@ -1055,7 +1074,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("a", 6),
|
||||
|
@ -1088,7 +1108,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
assertEquals(6, results.get(0).value);
|
||||
assertEquals("b", results.get(1).key);
|
||||
assertEquals(5, results.get(1).value);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void test0ByteKeys() throws Exception {
|
||||
|
@ -1128,19 +1148,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("a a", 50),
|
||||
new Input("a b", 50),
|
||||
}));
|
||||
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults3() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("a a", 7),
|
||||
new Input("a a", 7),
|
||||
|
@ -1149,19 +1171,20 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
new Input("a b", 5),
|
||||
}));
|
||||
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
|
||||
a.close();
|
||||
IOUtils.close(tempDir, a);
|
||||
}
|
||||
|
||||
public void testEndingSpace() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("i love lucy", 7),
|
||||
new Input("isla de muerta", 8),
|
||||
}));
|
||||
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
|
||||
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testTooManyExpansions() throws Exception {
|
||||
|
@ -1188,15 +1211,17 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
|
||||
suggester.build(new InputArrayIterator(new Input[] {new Input("a", 1)}));
|
||||
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testIllegalLookupArgument() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1, true);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("а где Люси?", 7),
|
||||
}));
|
||||
|
@ -1212,7 +1237,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
} catch (IllegalArgumentException e) {
|
||||
// expected
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
static final Iterable<Input> shuffle(Input...values) {
|
||||
|
@ -1227,7 +1252,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
// TODO: we need BaseSuggesterTestCase?
|
||||
public void testTooLongSuggestion() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a);
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a);
|
||||
String bigString = TestUtil.randomSimpleString(random(), 30000, 30000);
|
||||
try {
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
|
@ -1238,6 +1264,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,8 +43,11 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -62,7 +65,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
keys.add(new Input("foo bar boo far", 12));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
int numIters = atLeast(10);
|
||||
|
@ -73,7 +77,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals("foo bar boo far", results.get(0).key.toString());
|
||||
assertEquals(12, results.get(0).value, 0.01F);
|
||||
}
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
public void testNonLatinRandomEdits() throws IOException {
|
||||
|
@ -84,7 +88,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
keys.add(new Input("фуу бар буу фар", 12));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, true, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
int numIters = atLeast(10);
|
||||
|
@ -95,7 +100,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals("фуу бар буу фар", results.get(0).key.toString());
|
||||
assertEquals(12, results.get(0).value, 0.01F);
|
||||
}
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
|
||||
|
@ -108,7 +113,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
};
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("bariar", random()), false, 2);
|
||||
|
@ -171,7 +177,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals("barbara", results.get(2).key.toString());
|
||||
assertEquals(6, results.get(2).value, 0.01F);
|
||||
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -183,7 +189,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
};
|
||||
|
||||
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
|
||||
FuzzySuggester suggester = new FuzzySuggester(standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", standard, standard, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, false, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
|
||||
FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
|
@ -204,7 +211,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals("the ghost of christmas past", results.get(0).key.toString());
|
||||
assertEquals(50, results.get(0).value, 0.01F);
|
||||
|
||||
standard.close();
|
||||
IOUtils.close(standard, tempDir);
|
||||
}
|
||||
|
||||
public void testNoSeps() throws Exception {
|
||||
|
@ -216,7 +223,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
int options = 0;
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, true, 1, true, 1, 3, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",a, a, options, 256, -1, true, 1, true, 1, 3, false);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
// TODO: would be nice if "ab " would allow the test to
|
||||
// pass, and more generally if the analyzer can know
|
||||
|
@ -229,7 +237,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
// complete to "abcd", which has higher weight so should
|
||||
// appear first:
|
||||
assertEquals("abcd", r.get(0).key.toString());
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testGraphDups() throws Exception {
|
||||
|
@ -283,7 +291,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
new Input("wifi network is slow", 50),
|
||||
new Input("wi fi network is fast", 10),
|
||||
};
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
|
||||
|
@ -295,17 +304,18 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals(50, results.get(0).value);
|
||||
assertEquals("wi fi network is fast", results.get(1).key);
|
||||
assertEquals(10, results.get(1).value);
|
||||
analyzer.close();
|
||||
IOUtils.close(tempDir, analyzer);
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
public void testInputPathRequired() throws Exception {
|
||||
|
@ -360,11 +370,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
new Input("ab xc", 50),
|
||||
new Input("ba xd", 50),
|
||||
};
|
||||
FuzzySuggester suggester = new FuzzySuggester(analyzer);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("ab x", false, 1);
|
||||
assertTrue(results.size() == 1);
|
||||
analyzer.close();
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
private static Token token(String term, int posInc, int posLength) {
|
||||
|
@ -430,7 +441,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
public void testExactFirst() throws Exception {
|
||||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 1),
|
||||
new Input("x y z", 3),
|
||||
|
@ -464,13 +476,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testNonExactFirst() throws Exception {
|
||||
|
||||
Analyzer a = getUnusualAnalyzer();
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true, 1, true, 1, 3, false);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 1),
|
||||
|
@ -502,7 +515,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
// Holds surface form separately:
|
||||
|
@ -697,7 +710,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a,
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy",a, a,
|
||||
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, true, 1, false, 1, 3, unicodeAware);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
|
@ -836,12 +850,13 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
|
||||
}
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, true, 1, true, 1, 3, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 2, -1, true, 1, true, 1, 3, false);
|
||||
|
||||
List<Input> keys = Arrays.asList(new Input[] {
|
||||
new Input("a", 40),
|
||||
|
@ -858,12 +873,13 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals(60, results.get(0).value);
|
||||
assertEquals("a ", results.get(1).key);
|
||||
assertEquals(50, results.get(1).value);
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
public void testEditSeps() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
|
||||
|
||||
List<Input> keys = Arrays.asList(new Input[] {
|
||||
new Input("foo bar", 40),
|
||||
|
@ -879,7 +895,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
|
||||
assertEquals("[barbaz/60, barbazfoo/10]", suggester.lookup("bar baz", false, 5).toString());
|
||||
assertEquals("[barbazfoo/10]", suggester.lookup("bar baz foo", false, 5).toString());
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
|
@ -978,7 +994,8 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
boolean transpositions = random().nextBoolean();
|
||||
// TODO: test graph analyzers
|
||||
// TODO: test exactFirst / preserveSep permutations
|
||||
FuzzySuggester suggest = new FuzzySuggester(a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
|
||||
Directory tempDir = getDirectory();
|
||||
FuzzySuggester suggest = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
|
||||
|
@ -1022,7 +1039,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(expected.size(), actual.size());
|
||||
}
|
||||
a.close();
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
|
||||
|
@ -1194,4 +1211,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
return ref;
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,11 @@ package org.apache.lucene.search.suggest.fst;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.search.suggest.InMemorySorter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.junit.Test;
|
||||
|
@ -27,9 +30,13 @@ import org.junit.Test;
|
|||
public class BytesRefSortersTest extends LuceneTestCase {
|
||||
@Test
|
||||
public void testExternalRefSorter() throws Exception {
|
||||
ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter());
|
||||
Directory tempDir = newDirectory();
|
||||
if (tempDir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) tempDir).setEnableVirusScanner(false);
|
||||
}
|
||||
ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter(tempDir, "temp"));
|
||||
check(s);
|
||||
s.close();
|
||||
IOUtils.close(s, tempDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -20,15 +20,18 @@ package org.apache.lucene.search.suggest.fst;
|
|||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.*;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.fst.FSTCompletion.Completion;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.*;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link FSTCompletion}.
|
||||
*/
|
||||
public class FSTCompletionTest extends LuceneTestCase {
|
||||
|
||||
public static Input tf(String t, int v) {
|
||||
return new Input(t, v);
|
||||
}
|
||||
|
@ -155,7 +158,8 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testLargeInputConstantWeights() throws Exception {
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
|
||||
Directory tempDir = getDirectory();
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst", 10, true);
|
||||
|
||||
Random r = random();
|
||||
List<Input> keys = new ArrayList<>();
|
||||
|
@ -175,12 +179,14 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
}
|
||||
previous = current;
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testMultilingualInput() throws Exception {
|
||||
List<Input> input = LookupBenchmarkTest.readTop50KWiki();
|
||||
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
||||
Directory tempDir = getDirectory();
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
|
||||
lookup.build(new InputArrayIterator(input));
|
||||
assertEquals(input.size(), lookup.getCount());
|
||||
for (Input tf : input) {
|
||||
|
@ -192,6 +198,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
assertEquals(5, result.size());
|
||||
assertTrue(result.get(0).key.toString().equals("wit")); // exact match.
|
||||
assertTrue(result.get(1).key.toString().equals("with")); // highest count.
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testEmptyInput() throws Exception {
|
||||
|
@ -207,7 +214,8 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
freqs.add(new Input("" + rnd.nextLong(), weight));
|
||||
}
|
||||
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup();
|
||||
Directory tempDir = getDirectory();
|
||||
FSTCompletionLookup lookup = new FSTCompletionLookup(tempDir, "fst");
|
||||
lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
|
||||
|
||||
for (Input tf : freqs) {
|
||||
|
@ -219,6 +227,7 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
private CharSequence stringToCharSequence(String prefix) {
|
||||
|
@ -262,4 +271,12 @@ public class FSTCompletionTest extends LuceneTestCase {
|
|||
len = Math.max(len, s.length());
|
||||
return len;
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
package org.apache.lucene.search.suggest.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
/**
|
||||
* Try to build a suggester from a large data set. The input is a simple text
|
||||
* file, newline-delimited.
|
||||
*/
|
||||
public class LargeInputFST {
|
||||
public static void main(String[] args) throws IOException {
|
||||
Path input = Paths.get("/home/dweiss/tmp/shuffled.dict");
|
||||
|
||||
int buckets = 20;
|
||||
int shareMaxTail = 10;
|
||||
|
||||
ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter());
|
||||
FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);
|
||||
|
||||
BufferedReader reader = Files.newBufferedReader(input, StandardCharsets.UTF_8);
|
||||
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
scratch.copyChars(line);
|
||||
builder.add(scratch.get(), count % buckets);
|
||||
if ((count++ % 100000) == 0) {
|
||||
System.err.println("Line: " + count);
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Building FSTCompletion.");
|
||||
FSTCompletion completion = builder.build();
|
||||
|
||||
Path fstFile = Paths.get("completion.fst");
|
||||
System.out.println("Done. Writing automaton: " + fstFile.toAbsolutePath());
|
||||
completion.getFST().save(fstFile);
|
||||
sorter.close();
|
||||
}
|
||||
}
|
|
@ -19,9 +19,11 @@ package org.apache.lucene.search.suggest.fst;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.Input;
|
||||
import org.apache.lucene.search.suggest.InputArrayIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -37,7 +39,8 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
};
|
||||
|
||||
Random random = new Random(random().nextLong());
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst");
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
// top N of 2, but only foo is available
|
||||
|
@ -75,11 +78,13 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
assertEquals(10, results.get(1).value, 0.01F);
|
||||
assertEquals("barbara", results.get(2).key.toString());
|
||||
assertEquals(6, results.get(2).value, 0.01F);
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testExactFirst() throws Exception {
|
||||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", true);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 20),
|
||||
|
@ -99,11 +104,13 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
assertEquals(20, results.get(1).value);
|
||||
}
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testNonExactFirst() throws Exception {
|
||||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input("x y", 20),
|
||||
|
@ -123,6 +130,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
assertEquals(2, results.get(1).value);
|
||||
}
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
|
@ -153,7 +161,8 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
keys[i] = new Input(s, weight);
|
||||
}
|
||||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
|
||||
assertEquals(numWords, suggester.getCount());
|
||||
|
@ -196,6 +205,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
|
||||
}
|
||||
}
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void test0ByteKeys() throws Exception {
|
||||
|
@ -204,20 +214,32 @@ public class WFSTCompletionTest extends LuceneTestCase {
|
|||
BytesRef key2 = new BytesRef(3);
|
||||
key1.length = 3;
|
||||
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input(key1, 50),
|
||||
new Input(key2, 50),
|
||||
}));
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
|
||||
Directory tempDir = getDirectory();
|
||||
WFSTCompletionLookup suggester = new WFSTCompletionLookup(tempDir, "wfst", false);
|
||||
|
||||
suggester.build(new InputArrayIterator(new Input[0]));
|
||||
assertEquals(0, suggester.getCount());
|
||||
List<LookupResult> result = suggester.lookup("a", false, 20);
|
||||
assertTrue(result.isEmpty());
|
||||
tempDir.close();
|
||||
}
|
||||
|
||||
private Directory getDirectory() {
|
||||
Directory dir = newDirectory();
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,9 +23,12 @@ import java.io.IOException;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.zip.CRC32;
|
||||
|
@ -1165,5 +1168,26 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
|
|||
in.close(); // close again
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testCreateTempOutput() throws Throwable {
|
||||
Directory dir = getDirectory(createTempDir());
|
||||
List<String> names = new ArrayList<>();
|
||||
int iters = atLeast(50);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
IndexOutput out = dir.createTempOutput("foo", "bar", newIOContext(random()));
|
||||
names.add(out.getName());
|
||||
out.writeVInt(iter);
|
||||
out.close();
|
||||
}
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
IndexInput in = dir.openInput(names.get(iter), newIOContext(random()));
|
||||
assertEquals(iter, in.readVInt());
|
||||
in.close();
|
||||
}
|
||||
Set<String> files = new HashSet<String>(Arrays.asList(dir.listAll()));
|
||||
// In case ExtraFS struck:
|
||||
files.remove("extra0");
|
||||
assertEquals(new HashSet<String>(names), files);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class MockIndexOutputWrapper extends IndexOutput {
|
|||
|
||||
/** Construct an empty output buffer. */
|
||||
public MockIndexOutputWrapper(MockDirectoryWrapper dir, IndexOutput delegate, String name) {
|
||||
super("MockIndexOutputWrapper(" + delegate + ")");
|
||||
super("MockIndexOutputWrapper(" + delegate + ")", delegate.getName());
|
||||
this.dir = dir;
|
||||
this.name = name;
|
||||
this.delegate = delegate;
|
||||
|
|
|
@ -1,5 +1,22 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.FileSystem;
|
||||
|
@ -30,23 +47,6 @@ import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
|
|||
import com.carrotsearch.randomizedtesting.RandomizedContext;
|
||||
import com.carrotsearch.randomizedtesting.rules.TestRuleAdapter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Checks and cleans up temporary files.
|
||||
*
|
||||
|
@ -113,9 +113,6 @@ final class TestRuleTemporaryFilesCleanup extends TestRuleAdapter {
|
|||
assert tempDirBase == null;
|
||||
fileSystem = initializeFileSystem();
|
||||
javaTempDir = initializeJavaTempDir();
|
||||
|
||||
// So all code using OfflineSorter (suggesters, BKD tree, NumericRangeTree) see MockFS goodness, e.g. catching leaked file handles:
|
||||
OfflineSorter.setDefaultTempDir(javaTempDir);
|
||||
}
|
||||
|
||||
// os/config-independent limit for too many open files
|
||||
|
@ -236,7 +233,7 @@ final class TestRuleTemporaryFilesCleanup extends TestRuleAdapter {
|
|||
}
|
||||
}
|
||||
|
||||
final Path getPerTestClassTempDir() {
|
||||
Path getPerTestClassTempDir() {
|
||||
if (tempDirBase == null) {
|
||||
RandomizedContext ctx = RandomizedContext.current();
|
||||
Class<?> clazz = ctx.getTargetClass();
|
||||
|
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.util;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -59,7 +60,7 @@ public class ThrottledIndexOutput extends IndexOutput {
|
|||
public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis,
|
||||
long closeDelayMillis, long seekDelayMillis, long minBytesWritten,
|
||||
IndexOutput delegate) {
|
||||
super("ThrottledIndexOutput(" + delegate + ")");
|
||||
super("ThrottledIndexOutput(" + delegate + ")", delegate == null ? "n/a" : delegate.getName());
|
||||
assert bytesPerSecond > 0;
|
||||
this.delegate = delegate;
|
||||
this.bytesPerSecond = bytesPerSecond;
|
||||
|
@ -117,8 +118,9 @@ public class ThrottledIndexOutput extends IndexOutput {
|
|||
}
|
||||
|
||||
private static final void sleep(long ms) {
|
||||
if (ms <= 0)
|
||||
if (ms <= 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(ms);
|
||||
} catch (InterruptedException e) {
|
||||
|
|
|
@ -17,7 +17,11 @@ package org.apache.solr.spelling.suggest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
|
||||
|
@ -41,4 +45,23 @@ public abstract class LookupFactory {
|
|||
* <b>NOTE:</b> not all {@link Lookup} implementations store in-memory data structures
|
||||
* */
|
||||
public abstract String storeFileName();
|
||||
|
||||
/** Non-null if this sugggester created a temp dir, needed only during build */
|
||||
private static FSDirectory tmpBuildDir;
|
||||
|
||||
protected static synchronized FSDirectory getTempDir() {
|
||||
if (tmpBuildDir == null) {
|
||||
// Lazy init
|
||||
String tempDirPath = System.getProperty("java.io.tmpdir");
|
||||
if (tempDirPath == null) {
|
||||
throw new RuntimeException("Java has no temporary folder property (java.io.tmpdir)?");
|
||||
}
|
||||
try {
|
||||
tmpBuildDir = FSDirectory.open(Paths.get(tempDirPath));
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
return tmpBuildDir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -120,8 +120,7 @@ public class AnalyzingLookupFactory extends LookupFactory {
|
|||
? Boolean.valueOf(params.get(PRESERVE_POSITION_INCREMENTS).toString())
|
||||
: false;
|
||||
|
||||
|
||||
return new AnalyzingSuggester(indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
|
||||
return new AnalyzingSuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, flags, maxSurfaceFormsPerAnalyzedForm,
|
||||
maxGraphExpansions, preservePositionIncrements);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ public class FSTLookupFactory extends LookupFactory {
|
|||
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
|
||||
: true;
|
||||
|
||||
return new FSTCompletionLookup(buckets, exactMatchFirst);
|
||||
return new FSTCompletionLookup(getTempDir(), "suggester", buckets, exactMatchFirst);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -134,7 +134,7 @@ public class FuzzyLookupFactory extends LookupFactory {
|
|||
? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
|
||||
: FuzzySuggester.DEFAULT_UNICODE_AWARE;
|
||||
|
||||
return new FuzzySuggester(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
|
||||
return new FuzzySuggester(getTempDir(), "suggester", indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm,
|
||||
maxGraphExpansions, preservePositionIncrements, maxEdits, transpositions, nonFuzzyPrefix,
|
||||
minFuzzyLength, unicodeAware);
|
||||
}
|
||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.solr.spelling.suggest.fst;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.fst.*;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -48,7 +46,7 @@ public class WFSTLookupFactory extends LookupFactory {
|
|||
? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
|
||||
: true;
|
||||
|
||||
return new WFSTCompletionLookup(exactMatchFirst);
|
||||
return new WFSTCompletionLookup(getTempDir(), "suggester", exactMatchFirst);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,7 +31,7 @@ public class TSTLookupFactory extends LookupFactory {
|
|||
|
||||
@Override
|
||||
public Lookup create(NamedList params, SolrCore core) {
|
||||
return new TSTLookup();
|
||||
return new TSTLookup(getTempDir(), "suggester");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,7 +37,7 @@ public class CachedIndexOutput extends ReusedBufferedIndexOutput {
|
|||
|
||||
public CachedIndexOutput(BlockDirectory directory, IndexOutput dest,
|
||||
int blockSize, String name, Cache cache, int bufferSize) {
|
||||
super("dest=" + dest + " name=" + name, bufferSize);
|
||||
super("dest=" + dest + " name=" + name, name, bufferSize);
|
||||
this.directory = directory;
|
||||
this.dest = dest;
|
||||
this.blockSize = blockSize;
|
||||
|
|
|
@ -43,12 +43,12 @@ public abstract class ReusedBufferedIndexOutput extends IndexOutput {
|
|||
|
||||
private final Store store;
|
||||
|
||||
public ReusedBufferedIndexOutput(String resourceDescription) {
|
||||
this(resourceDescription, BUFFER_SIZE);
|
||||
public ReusedBufferedIndexOutput(String resourceDescription, String name) {
|
||||
this(resourceDescription, name, BUFFER_SIZE);
|
||||
}
|
||||
|
||||
public ReusedBufferedIndexOutput(String resourceDescription, int bufferSize) {
|
||||
super(resourceDescription);
|
||||
public ReusedBufferedIndexOutput(String resourceDescription, String name, int bufferSize) {
|
||||
super(resourceDescription, name);
|
||||
checkBufferSize(bufferSize);
|
||||
this.bufferSize = bufferSize;
|
||||
store = BufferStore.instance(bufferSize);
|
||||
|
|
|
@ -107,7 +107,12 @@ public class HdfsDirectory extends BaseDirectory {
|
|||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
return new HdfsFileWriter(getFileSystem(), new Path(hdfsDirPath, name));
|
||||
return new HdfsFileWriter(getFileSystem(), new Path(hdfsDirPath, name), name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
private String[] getNormalNames(List<String> files) {
|
||||
|
|
|
@ -37,8 +37,8 @@ public class HdfsFileWriter extends OutputStreamIndexOutput {
|
|||
public static final String HDFS_SYNC_BLOCK = "solr.hdfs.sync.block";
|
||||
public static final int BUFFER_SIZE = 16384;
|
||||
|
||||
public HdfsFileWriter(FileSystem fileSystem, Path path) throws IOException {
|
||||
super("fileSystem=" + fileSystem + " path=" + path, getOutputStream(fileSystem, path), BUFFER_SIZE);
|
||||
public HdfsFileWriter(FileSystem fileSystem, Path path, String name) throws IOException {
|
||||
super("fileSystem=" + fileSystem + " path=" + path, name, getOutputStream(fileSystem, path), BUFFER_SIZE);
|
||||
}
|
||||
|
||||
private static final OutputStream getOutputStream(FileSystem fileSystem, Path path) throws IOException {
|
||||
|
|
Loading…
Reference in New Issue