mirror of https://github.com/apache/lucene.git
SOLR-2375 Store & Load functionality for Suggester Lookup implementations.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1075804 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
21355a452b
commit
e71e682352
|
@ -175,6 +175,10 @@ Other Changes
|
|||
using Generics where applicable in method/object declatations, and
|
||||
adding @SuppressWarnings("unchecked") when appropriate (hossman)
|
||||
|
||||
* SOLR-2375: Suggester Lookup implementations now store trie data
|
||||
and load it back on init. This means that large tries don't have to be
|
||||
rebuilt on every commit or core reload. (ab)
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -83,6 +83,8 @@ public class Suggester extends SolrSpellChecker {
|
|||
if (lookupImpl == null) {
|
||||
lookupImpl = JaspellLookup.class.getName();
|
||||
}
|
||||
lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
|
||||
lookup.init(config, core);
|
||||
String store = (String)config.get(STORE_DIR);
|
||||
if (store != null) {
|
||||
storeDir = new File(store);
|
||||
|
@ -91,6 +93,13 @@ public class Suggester extends SolrSpellChecker {
|
|||
}
|
||||
if (!storeDir.exists()) {
|
||||
storeDir.mkdirs();
|
||||
} else {
|
||||
// attempt reload of the stored lookup
|
||||
try {
|
||||
lookup.load(storeDir);
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Loading stored lookup data failed", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return name;
|
||||
|
@ -107,17 +116,17 @@ public class Suggester extends SolrSpellChecker {
|
|||
dictionary = new FileDictionary(new InputStreamReader(
|
||||
core.getResourceLoader().openResource(sourceLocation), "UTF-8"));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
// should not happen
|
||||
LOG.error("should not happen", e);
|
||||
}
|
||||
}
|
||||
lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
|
||||
try {
|
||||
lookup.build(dictionary);
|
||||
if (storeDir != null) {
|
||||
lookup.store(storeDir);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
LOG.error("Error while building or storing Suggester data", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
package org.apache.solr.spelling.suggest.jaspell;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -9,6 +13,7 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.spelling.suggest.Lookup;
|
||||
import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
|
||||
import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
|
||||
import org.apache.solr.util.SortedIterator;
|
||||
import org.apache.solr.util.TermFreqIterator;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -16,7 +21,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
public class JaspellLookup extends Lookup {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
|
||||
JaspellTernarySearchTrie trie;
|
||||
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
|
||||
private boolean usePrefix = true;
|
||||
private int editDistance = 2;
|
||||
|
||||
|
@ -89,14 +94,89 @@ public class JaspellLookup extends Lookup {
|
|||
return res;
|
||||
}
|
||||
|
||||
public static final String FILENAME = "jaspell.dat";
|
||||
private static final byte LO_KID = 0x01;
|
||||
private static final byte EQ_KID = 0x02;
|
||||
private static final byte HI_KID = 0x04;
|
||||
private static final byte HAS_VALUE = 0x08;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean load(File storeDir) throws IOException {
|
||||
return false;
|
||||
File data = new File(storeDir, FILENAME);
|
||||
if (!data.exists() || !data.canRead()) {
|
||||
return false;
|
||||
}
|
||||
DataInputStream in = new DataInputStream(new FileInputStream(data));
|
||||
TSTNode root = trie.new TSTNode('\0', null);
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
trie.setRoot(root);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
|
||||
node.splitchar = in.readChar();
|
||||
byte mask = in.readByte();
|
||||
if ((mask & HAS_VALUE) != 0) {
|
||||
node.data = new Float(in.readFloat());
|
||||
}
|
||||
if ((mask & LO_KID) != 0) {
|
||||
TSTNode kid = trie.new TSTNode('\0', node);
|
||||
node.relatives[TSTNode.LOKID] = kid;
|
||||
readRecursively(in, kid);
|
||||
}
|
||||
if ((mask & EQ_KID) != 0) {
|
||||
TSTNode kid = trie.new TSTNode('\0', node);
|
||||
node.relatives[TSTNode.EQKID] = kid;
|
||||
readRecursively(in, kid);
|
||||
}
|
||||
if ((mask & HI_KID) != 0) {
|
||||
TSTNode kid = trie.new TSTNode('\0', node);
|
||||
node.relatives[TSTNode.HIKID] = kid;
|
||||
readRecursively(in, kid);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean store(File storeDir) throws IOException {
|
||||
return false;
|
||||
if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
|
||||
return false;
|
||||
}
|
||||
TSTNode root = trie.getRoot();
|
||||
if (root == null) { // empty tree
|
||||
return false;
|
||||
}
|
||||
File data = new File(storeDir, FILENAME);
|
||||
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
out.writeChar(node.splitchar);
|
||||
byte mask = 0;
|
||||
if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
|
||||
if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
|
||||
if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
|
||||
if (node.data != null) mask |= HAS_VALUE;
|
||||
out.writeByte(mask);
|
||||
if (node.data != null) {
|
||||
out.writeFloat((Float)node.data);
|
||||
}
|
||||
writeRecursively(out, node.relatives[TSTNode.LOKID]);
|
||||
writeRecursively(out, node.relatives[TSTNode.EQKID]);
|
||||
writeRecursively(out, node.relatives[TSTNode.HIKID]);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -127,6 +127,16 @@ public class JaspellTernarySearchTrie {
|
|||
*/
|
||||
public JaspellTernarySearchTrie() {
|
||||
}
|
||||
|
||||
// for loading
|
||||
void setRoot(TSTNode newRoot) {
|
||||
rootNode = newRoot;
|
||||
}
|
||||
|
||||
// for saving
|
||||
TSTNode getRoot() {
|
||||
return rootNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a Ternary Search Trie and loads data from a <code>File</code>
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
package org.apache.solr.spelling.suggest.tst;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -13,8 +17,8 @@ import org.apache.solr.util.SortedIterator;
|
|||
import org.apache.solr.util.TermFreqIterator;
|
||||
|
||||
public class TSTLookup extends Lookup {
|
||||
TernaryTreeNode root;
|
||||
TSTAutocomplete autocomplete;
|
||||
TernaryTreeNode root = new TernaryTreeNode();
|
||||
TSTAutocomplete autocomplete = new TSTAutocomplete();
|
||||
|
||||
@Override
|
||||
public void init(NamedList config, SolrCore core) {
|
||||
|
@ -23,7 +27,6 @@ public class TSTLookup extends Lookup {
|
|||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
root = new TernaryTreeNode();
|
||||
autocomplete = new TSTAutocomplete();
|
||||
// buffer first
|
||||
if (!(tfit instanceof SortedIterator)) {
|
||||
// make sure it's sorted
|
||||
|
@ -48,7 +51,16 @@ public class TSTLookup extends Lookup {
|
|||
|
||||
@Override
|
||||
public Object get(String key) {
|
||||
throw new UnsupportedOperationException("get() is not supported here");
|
||||
List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
|
||||
if (list == null || list.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
for (TernaryTreeNode n : list) {
|
||||
if (n.token.equals(key)) {
|
||||
return n.val;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,15 +87,94 @@ public class TSTLookup extends Lookup {
|
|||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public static final String FILENAME = "tst.dat";
|
||||
|
||||
private static final byte LO_KID = 0x01;
|
||||
private static final byte EQ_KID = 0x02;
|
||||
private static final byte HI_KID = 0x04;
|
||||
private static final byte HAS_TOKEN = 0x08;
|
||||
private static final byte HAS_VALUE = 0x10;
|
||||
|
||||
@Override
|
||||
public boolean load(File storeDir) throws IOException {
|
||||
return false;
|
||||
public synchronized boolean load(File storeDir) throws IOException {
|
||||
File data = new File(storeDir, FILENAME);
|
||||
if (!data.exists() || !data.canRead()) {
|
||||
return false;
|
||||
}
|
||||
DataInputStream in = new DataInputStream(new FileInputStream(data));
|
||||
root = new TernaryTreeNode();
|
||||
try {
|
||||
readRecursively(in, root);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// pre-order traversal
|
||||
private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException {
|
||||
node.splitchar = in.readChar();
|
||||
byte mask = in.readByte();
|
||||
if ((mask & HAS_TOKEN) != 0) {
|
||||
node.token = in.readUTF();
|
||||
}
|
||||
if ((mask & HAS_VALUE) != 0) {
|
||||
node.val = new Float(in.readFloat());
|
||||
}
|
||||
if ((mask & LO_KID) != 0) {
|
||||
node.loKid = new TernaryTreeNode();
|
||||
readRecursively(in, node.loKid);
|
||||
}
|
||||
if ((mask & EQ_KID) != 0) {
|
||||
node.eqKid = new TernaryTreeNode();
|
||||
readRecursively(in, node.eqKid);
|
||||
}
|
||||
if ((mask & HI_KID) != 0) {
|
||||
node.hiKid = new TernaryTreeNode();
|
||||
readRecursively(in, node.hiKid);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean store(File storeDir) throws IOException {
|
||||
return false;
|
||||
public synchronized boolean store(File storeDir) throws IOException {
|
||||
if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
|
||||
return false;
|
||||
}
|
||||
File data = new File(storeDir, FILENAME);
|
||||
DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
|
||||
try {
|
||||
writeRecursively(out, root);
|
||||
out.flush();
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// pre-order traversal
|
||||
private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException {
|
||||
// write out the current node
|
||||
out.writeChar(node.splitchar);
|
||||
// prepare a mask of kids
|
||||
byte mask = 0;
|
||||
if (node.eqKid != null) mask |= EQ_KID;
|
||||
if (node.loKid != null) mask |= LO_KID;
|
||||
if (node.hiKid != null) mask |= HI_KID;
|
||||
if (node.token != null) mask |= HAS_TOKEN;
|
||||
if (node.val != null) mask |= HAS_VALUE;
|
||||
out.writeByte(mask);
|
||||
if (node.token != null) out.writeUTF(node.token);
|
||||
if (node.val != null) out.writeFloat((Float)node.val);
|
||||
// recurse and write kids
|
||||
if (node.loKid != null) {
|
||||
writeRecursively(out, node.loKid);
|
||||
}
|
||||
if (node.eqKid != null) {
|
||||
writeRecursively(out, node.eqKid);
|
||||
}
|
||||
if (node.hiKid != null) {
|
||||
writeRecursively(out, node.hiKid);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
|
||||
<str name="lookupImpl">org.apache.solr.spelling.suggest.jaspell.JaspellLookup</str>
|
||||
<str name="field">suggest</str>
|
||||
<str name="storeDir">suggest</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
|
||||
<!-- Suggester properties -->
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.spelling.suggest;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
|
||||
import org.apache.solr.spelling.suggest.tst.TSTLookup;
|
||||
import org.junit.Test;
|
||||
|
||||
public class PersistenceTest extends SolrTestCaseJ4 {
|
||||
|
||||
public static final String[] keys = new String[] {
|
||||
"one",
|
||||
"two",
|
||||
"three",
|
||||
"four",
|
||||
"oneness",
|
||||
"onerous",
|
||||
"onesimus",
|
||||
"twofold",
|
||||
"twonk",
|
||||
"thrive",
|
||||
"through",
|
||||
"threat",
|
||||
"foundation",
|
||||
"fourier",
|
||||
"fourty"
|
||||
};
|
||||
|
||||
@Test
|
||||
public void testTSTPersistence() throws Exception {
|
||||
TSTLookup lookup = new TSTLookup();
|
||||
for (String k : keys) {
|
||||
lookup.add(k, new Float(k.length()));
|
||||
}
|
||||
File storeDir = new File(TEST_HOME);
|
||||
lookup.store(storeDir);
|
||||
lookup = new TSTLookup();
|
||||
lookup.load(storeDir);
|
||||
for (String k : keys) {
|
||||
Float val = (Float)lookup.get(k);
|
||||
assertNotNull(k, val);
|
||||
assertEquals(k, k.length(), val.intValue());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJaspellPersistence() throws Exception {
|
||||
JaspellLookup lookup = new JaspellLookup();
|
||||
for (String k : keys) {
|
||||
lookup.add(k, new Float(k.length()));
|
||||
}
|
||||
File storeDir = new File(TEST_HOME);
|
||||
lookup.store(storeDir);
|
||||
lookup = new JaspellLookup();
|
||||
lookup.load(storeDir);
|
||||
for (String k : keys) {
|
||||
Float val = (Float)lookup.get(k);
|
||||
assertNotNull(k, val);
|
||||
assertEquals(k, k.length(), val.intValue());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -27,6 +27,7 @@ import org.apache.solr.util.TermFreqIterator;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -65,6 +66,29 @@ public class SuggesterTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testReload() throws Exception {
|
||||
String leaveData = System.getProperty("solr.test.leavedatadir");
|
||||
if (leaveData == null) leaveData = "";
|
||||
System.setProperty("solr.test.leavedatadir", "true");
|
||||
addDocs();
|
||||
assertU(commit());
|
||||
File data = dataDir;
|
||||
String config = configString;
|
||||
deleteCore();
|
||||
dataDir = data;
|
||||
configString = config;
|
||||
initCore();
|
||||
assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
|
||||
"//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
|
||||
);
|
||||
|
||||
// restore the property
|
||||
System.setProperty("solr.test.leavedatadir", leaveData);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRebuild() throws Exception {
|
||||
addDocs();
|
||||
assertU(commit());
|
||||
assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
|
||||
|
|
Loading…
Reference in New Issue