mirror of https://github.com/apache/lucene.git
SOLR-351: external file value source
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@587098 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc96a325ed
commit
2d07595bc6
|
@ -139,6 +139,10 @@ New Features
|
||||||
26. SOLR-334L Pluggable query parsers. Allows specification of query
|
26. SOLR-334L Pluggable query parsers. Allows specification of query
|
||||||
type and arguments as a prefix on a query string. (yonik)
|
type and arguments as a prefix on a query string. (yonik)
|
||||||
|
|
||||||
|
27. SOLR-351L External Value Source. An external file may be used
|
||||||
|
to specify the values of a field, currently usable as
|
||||||
|
a ValueSource in a FunctionQuery. (yonik)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.solr.search.function.ValueSource;
|
||||||
|
import org.apache.solr.search.function.FloatFieldSource;
|
||||||
|
import org.apache.solr.search.function.FileFloatSource;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.request.XMLWriter;
|
||||||
|
import org.apache.solr.request.TextResponseWriter;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/** Get values from an external file instead of the index.
|
||||||
|
*
|
||||||
|
* <p/><code>keyField</code> will normally be the unique key field, but it doesn't have to be.
|
||||||
|
* <ul><li> It's OK to have a keyField value that can't be found in the index</li>
|
||||||
|
* <li>It's OK to have some documents without a keyField in the file (defVal is used as the default)</li>
|
||||||
|
* <li>It's OK for a keyField value to point to multiple documents (no uniqueness requirement)</li>
|
||||||
|
* </ul>
|
||||||
|
* <code>valType</code> is a reference to another fieldType to define the value type of this field (must currently be FloatField (float))
|
||||||
|
*
|
||||||
|
* The format of the external file is simply newline separated keyFieldValue=floatValue.
|
||||||
|
* <br/>Example:
|
||||||
|
* <br/><code>doc33=1.414</code>
|
||||||
|
* <br/><code>doc34=3.14159</code>
|
||||||
|
* <br/><code>doc40=42</code>
|
||||||
|
*
|
||||||
|
* <p/>Solr looks for the external file in the index directory under the name of
|
||||||
|
* external_<fieldname> or external_<fieldname>.*
|
||||||
|
*
|
||||||
|
* <p/>If any files of the latter pattern appear, the last (after being sorted by name) will be used and previous versions will be deleted.
|
||||||
|
* This is to help support systems where one may not be able to overwrite a file (like Windows, if the file is in use).
|
||||||
|
* <p/>If the external file has already been loaded, and it is changed, those changes will not be visible until a commit has been done.
|
||||||
|
* <p/>The external file may be sorted or unsorted by the key field, but it will be substantially slower (untested) if it isn't sorted.
|
||||||
|
* <p/>Fields of this type may currently only be used as a ValueSource in a FunctionQuery.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class ExternalFileField extends FieldType {
|
||||||
|
private FieldType ftype;
|
||||||
|
private String keyFieldName;
|
||||||
|
private IndexSchema schema;
|
||||||
|
private float defVal;
|
||||||
|
|
||||||
|
protected void init(IndexSchema schema, Map<String,String> args) {
|
||||||
|
restrictProps(SORT_MISSING_FIRST | SORT_MISSING_LAST);
|
||||||
|
String ftypeS = getArg("valType", args);
|
||||||
|
if (ftypeS!=null) {
|
||||||
|
ftype = schema.getFieldTypes().get(ftypeS);
|
||||||
|
if (ftype==null || !(ftype instanceof FloatField)) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Only float (FloatField) is currently supported as external field type. got " + ftypeS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keyFieldName = args.remove("keyField");
|
||||||
|
String defValS = args.remove("defVal");
|
||||||
|
defVal = defValS==null ? 0 : Float.parseFloat(defValS);
|
||||||
|
this.schema = schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SortField getSortField(SchemaField field,boolean reverse) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueSource getValueSource(SchemaField field, QParser parser) {
|
||||||
|
// default key field to unique key
|
||||||
|
SchemaField keyField = keyFieldName==null ? schema.getUniqueKeyField() : schema.getField(keyFieldName);
|
||||||
|
return new FileFloatSource(field, keyField, defVal, parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,417 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.search.function;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.TermDocs;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtains float field values from an external file.
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class FileFloatSource extends ValueSource {
|
||||||
|
private SchemaField field;
|
||||||
|
private final SchemaField keyField;
|
||||||
|
private final float defVal;
|
||||||
|
|
||||||
|
private final String indexDir;
|
||||||
|
|
||||||
|
public FileFloatSource(SchemaField field, SchemaField keyField, float defVal, QParser parser) {
|
||||||
|
this.field = field;
|
||||||
|
this.keyField = keyField;
|
||||||
|
this.defVal = defVal;
|
||||||
|
this.indexDir = parser.getReq().getCore().getIndexDir();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String description() {
|
||||||
|
return "float(" + field + ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocValues getValues(IndexReader reader) throws IOException {
|
||||||
|
final float[] arr = getCachedFloats(reader);
|
||||||
|
return new DocValues() {
|
||||||
|
public float floatVal(int doc) {
|
||||||
|
return arr[doc];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int intVal(int doc) {
|
||||||
|
return (int)arr[doc];
|
||||||
|
}
|
||||||
|
|
||||||
|
public long longVal(int doc) {
|
||||||
|
return (long)arr[doc];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double doubleVal(int doc) {
|
||||||
|
return (double)arr[doc];
|
||||||
|
}
|
||||||
|
|
||||||
|
public String strVal(int doc) {
|
||||||
|
return Float.toString(arr[doc]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(int doc) {
|
||||||
|
return description() + '=' + floatVal(doc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o.getClass() != FileFloatSource.class) return false;
|
||||||
|
FileFloatSource other = (FileFloatSource)o;
|
||||||
|
return this.field.getName().equals(other.field.getName())
|
||||||
|
&& this.keyField.getName().equals(other.keyField.getName())
|
||||||
|
&& this.defVal == other.defVal
|
||||||
|
&& this.indexDir.equals(other.indexDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return FileFloatSource.class.hashCode() + field.getName().hashCode();
|
||||||
|
};
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "FileFloatSource(field="+field.getName()+",keyField="+keyField.getName()
|
||||||
|
+ ",defVal="+defVal+",indexDir="+indexDir+")";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private final float[] getCachedFloats(IndexReader reader) {
|
||||||
|
return (float[])floatCache.get(reader, new Entry(this));
|
||||||
|
}
|
||||||
|
|
||||||
|
static Cache floatCache = new Cache() {
|
||||||
|
protected Object createValue(IndexReader reader, Object key) {
|
||||||
|
return getFloats(((Entry)key).ffs, reader);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Internal cache. (from lucene FieldCache) */
|
||||||
|
abstract static class Cache {
|
||||||
|
private final Map readerCache = new WeakHashMap();
|
||||||
|
|
||||||
|
protected abstract Object createValue(IndexReader reader, Object key);
|
||||||
|
|
||||||
|
public Object get(IndexReader reader, Object key) {
|
||||||
|
Map innerCache;
|
||||||
|
Object value;
|
||||||
|
synchronized (readerCache) {
|
||||||
|
innerCache = (Map) readerCache.get(reader);
|
||||||
|
if (innerCache == null) {
|
||||||
|
innerCache = new HashMap();
|
||||||
|
readerCache.put(reader, innerCache);
|
||||||
|
value = null;
|
||||||
|
} else {
|
||||||
|
value = innerCache.get(key);
|
||||||
|
}
|
||||||
|
if (value == null) {
|
||||||
|
value = new CreationPlaceholder();
|
||||||
|
innerCache.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (value instanceof CreationPlaceholder) {
|
||||||
|
synchronized (value) {
|
||||||
|
CreationPlaceholder progress = (CreationPlaceholder) value;
|
||||||
|
if (progress.value == null) {
|
||||||
|
progress.value = createValue(reader, key);
|
||||||
|
synchronized (readerCache) {
|
||||||
|
innerCache.put(key, progress.value);
|
||||||
|
onlyForTesting = progress.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return progress.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Object onlyForTesting; // set to the last value
|
||||||
|
|
||||||
|
static final class CreationPlaceholder {
|
||||||
|
Object value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Expert: Every composite-key in the internal cache is of this type. */
|
||||||
|
private static class Entry {
|
||||||
|
final FileFloatSource ffs;
|
||||||
|
public Entry(FileFloatSource ffs) {
|
||||||
|
this.ffs = ffs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof Entry)) return false;
|
||||||
|
Entry other = (Entry)o;
|
||||||
|
return ffs.equals(other.ffs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return ffs.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static float[] getFloats(FileFloatSource ffs, IndexReader reader) {
|
||||||
|
float[] vals = new float[reader.maxDoc()];
|
||||||
|
if (ffs.defVal != 0) {
|
||||||
|
Arrays.fill(vals, ffs.defVal);
|
||||||
|
}
|
||||||
|
InputStream is;
|
||||||
|
String fname = "external_" + ffs.field.getName();
|
||||||
|
try {
|
||||||
|
is = getLatestFile(ffs.indexDir, fname);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// log, use defaults
|
||||||
|
SolrCore.log.severe("Error opening external value source file: " +e);
|
||||||
|
return vals;
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferedReader r = new BufferedReader(new InputStreamReader(is));
|
||||||
|
|
||||||
|
String idName = ffs.keyField.getName().intern();
|
||||||
|
FieldType idType = ffs.keyField.getType();
|
||||||
|
boolean sorted=true; // assume sorted until we discover it's not
|
||||||
|
|
||||||
|
|
||||||
|
// warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
|
||||||
|
// because of this, simply ask the reader for a new termEnum rather than
|
||||||
|
// trying to use skipTo()
|
||||||
|
|
||||||
|
List<String> notFound = new ArrayList<String>();
|
||||||
|
int notFoundCount=0;
|
||||||
|
int otherErrors=0;
|
||||||
|
|
||||||
|
TermDocs termDocs = null;
|
||||||
|
Term protoTerm = new Term(idName, "");
|
||||||
|
TermEnum termEnum = null;
|
||||||
|
// Number of times to try termEnum.next() before resorting to skip
|
||||||
|
int numTimesNext = 10;
|
||||||
|
|
||||||
|
char delimiter='=';
|
||||||
|
String termVal;
|
||||||
|
boolean hasNext=true;
|
||||||
|
String prevKey="";
|
||||||
|
|
||||||
|
String lastVal="\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF";
|
||||||
|
|
||||||
|
try {
|
||||||
|
termDocs = reader.termDocs();
|
||||||
|
termEnum = reader.terms(protoTerm);
|
||||||
|
Term t = termEnum.term();
|
||||||
|
if (t != null && t.field() == idName) { // intern'd comparison
|
||||||
|
termVal = t.text();
|
||||||
|
} else {
|
||||||
|
termVal = lastVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (String line; (line=r.readLine())!=null;) {
|
||||||
|
int delimIndex = line.indexOf(delimiter);
|
||||||
|
if (delimIndex < 0) continue;
|
||||||
|
|
||||||
|
int endIndex = line.length();
|
||||||
|
/* EOLs should already be removed for BufferedReader.readLine()
|
||||||
|
for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
|
||||||
|
char ch = line.charAt(endIndex-1);
|
||||||
|
if (ch!='\n' && ch!='\r') break;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
String key = line.substring(0, delimIndex);
|
||||||
|
String val = line.substring(delimIndex+1, endIndex);
|
||||||
|
|
||||||
|
String internalKey = idType.toInternal(key);
|
||||||
|
float fval;
|
||||||
|
try {
|
||||||
|
fval=Float.parseFloat(val);
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (++otherErrors<=10) {
|
||||||
|
SolrCore.log.severe( "Error loading external value source + fileName + " + e
|
||||||
|
+ (otherErrors<10 ? "" : "\tSkipping future errors for this file.")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
continue; // go to next line in file.. leave values as default.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sorted) {
|
||||||
|
// make sure this key is greater than the previous key
|
||||||
|
sorted = internalKey.compareTo(prevKey) >= 0;
|
||||||
|
prevKey = internalKey;
|
||||||
|
|
||||||
|
if (sorted) {
|
||||||
|
int countNext = 0;
|
||||||
|
for(;;) {
|
||||||
|
int cmp = internalKey.compareTo(termVal);
|
||||||
|
if (cmp == 0) {
|
||||||
|
termDocs.seek(termEnum);
|
||||||
|
while (termDocs.next()) {
|
||||||
|
vals[termDocs.doc()] = fval;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} else if (cmp < 0) {
|
||||||
|
// term enum has already advanced past current key... we didn't find it.
|
||||||
|
if (notFoundCount<10) { // collect first 10 not found for logging
|
||||||
|
notFound.add(key);
|
||||||
|
}
|
||||||
|
notFoundCount++;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// termEnum is less than our current key, so skip ahead
|
||||||
|
|
||||||
|
// try next() a few times to see if we hit or pass the target.
|
||||||
|
// Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
|
||||||
|
// so the best thing is to simply ask the reader for a new termEnum(target)
|
||||||
|
// if we really need to skip.
|
||||||
|
if (++countNext > numTimesNext) {
|
||||||
|
termEnum = reader.terms(protoTerm.createTerm(internalKey));
|
||||||
|
t = termEnum.term();
|
||||||
|
} else {
|
||||||
|
hasNext = termEnum.next();
|
||||||
|
t = hasNext ? termEnum.term() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t != null && t.field() == idName) { // intern'd comparison
|
||||||
|
termVal = t.text();
|
||||||
|
} else {
|
||||||
|
termVal = lastVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end for(;;)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sorted) {
|
||||||
|
termEnum = reader.terms(protoTerm.createTerm(internalKey));
|
||||||
|
t = termEnum.term();
|
||||||
|
if (t != null && t.field() == idName // intern'd comparison
|
||||||
|
&& internalKey.equals(t.text()))
|
||||||
|
{
|
||||||
|
termDocs.seek (termEnum);
|
||||||
|
while (termDocs.next()) {
|
||||||
|
vals[termDocs.doc()] = fval;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (notFoundCount<10) { // collect first 10 not found for logging
|
||||||
|
notFound.add(key);
|
||||||
|
}
|
||||||
|
notFoundCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// log, use defaults
|
||||||
|
SolrCore.log.severe("Error loading external value source: " +e);
|
||||||
|
} finally {
|
||||||
|
// swallow exceptions on close so we don't override any
|
||||||
|
// exceptions that happened in the loop
|
||||||
|
if (termDocs!=null) try{termDocs.close();}catch(Exception e){}
|
||||||
|
if (termEnum!=null) try{termEnum.close();}catch(Exception e){}
|
||||||
|
try{r.close();}catch(Exception e){}
|
||||||
|
}
|
||||||
|
|
||||||
|
SolrCore.log.info("Loaded external value source " + fname
|
||||||
|
+ (notFoundCount==0 ? "" : " :"+notFoundCount+" missing keys "+notFound)
|
||||||
|
);
|
||||||
|
|
||||||
|
return vals;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Future: refactor/pull out into VersionedFile class
|
||||||
|
|
||||||
|
/* Open the latest version of a file... fileName if that exists, or
|
||||||
|
* the last fileName.* after being sorted lexicographically.
|
||||||
|
* Older versions of the file are deleted (and queued for deletion if
|
||||||
|
* that fails).
|
||||||
|
*/
|
||||||
|
private static InputStream getLatestFile(String dirName, String fileName) throws FileNotFoundException {
|
||||||
|
Collection<File> oldFiles=null;
|
||||||
|
final String prefix = fileName+'.';
|
||||||
|
File f = new File(dirName, fileName);
|
||||||
|
InputStream is = null;
|
||||||
|
|
||||||
|
// there can be a race between checking for a file and opening it...
|
||||||
|
// the user may have just put a new version in and deleted an old version.
|
||||||
|
// try multiple times in a row.
|
||||||
|
for (int retry=0; retry<10; retry++) {
|
||||||
|
try {
|
||||||
|
if (!f.exists()) {
|
||||||
|
File dir = new File(dirName);
|
||||||
|
String[] names = dir.list(new FilenameFilter() {
|
||||||
|
public boolean accept(File dir, String name) {
|
||||||
|
return name.startsWith(prefix);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Arrays.sort(names);
|
||||||
|
f = new File(dir, names[names.length-1]);
|
||||||
|
oldFiles = new ArrayList<File>();
|
||||||
|
for (int i=0; i<names.length-1; i++) {
|
||||||
|
oldFiles.add(new File(dir, names[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
is = new FileInputStream(f);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// swallow exception for now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow exception to be thrown from the final try.
|
||||||
|
is = new FileInputStream(f);
|
||||||
|
|
||||||
|
// delete old files only after we have successfuly opened the newest
|
||||||
|
if (oldFiles != null) {
|
||||||
|
delete(oldFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
return is;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private static final Set<File> deleteList = new HashSet<File>();
|
||||||
|
private static synchronized void delete(Collection<File> files) {
|
||||||
|
synchronized (deleteList) {
|
||||||
|
deleteList.addAll(files);
|
||||||
|
List<File> deleted = new ArrayList<File>();
|
||||||
|
for (File df : deleteList) {
|
||||||
|
try {
|
||||||
|
df.delete();
|
||||||
|
// deleteList.remove(df);
|
||||||
|
deleted.add(df);
|
||||||
|
} catch (SecurityException e) {
|
||||||
|
if (!df.exists()) {
|
||||||
|
deleted.add(df);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deleteList.removeAll(deleted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -18,9 +18,16 @@
|
||||||
package org.apache.solr.search.function;
|
package org.apache.solr.search.function;
|
||||||
|
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests some basic functionality of Solr while demonstrating good
|
* Tests some basic functionality of Solr while demonstrating good
|
||||||
|
@ -43,12 +50,27 @@ public class TestFunctionQuery extends AbstractSolrTestCase {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String base = "external_foo_extf";
|
||||||
|
void makeExternalFile(String field, String contents, String charset) {
|
||||||
|
String dir = h.getCore().getIndexDir();
|
||||||
|
String filename = dir + "/external_" + field + "." + System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
|
||||||
|
out.write(contents);
|
||||||
|
out.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void createIndex(String field, float... values) {
|
void createIndex(String field, float... values) {
|
||||||
// lrf.args.put("version","2.0");
|
// lrf.args.put("version","2.0");
|
||||||
for (float val : values) {
|
for (float val : values) {
|
||||||
String s = Float.toString(val);
|
String s = Float.toString(val);
|
||||||
assertU(adoc("id", s, field, s));
|
if (field!=null) assertU(adoc("id", s, field, s));
|
||||||
System.out.println("added doc for " + val);
|
else assertU(adoc("id", s));
|
||||||
|
// System.out.println("added doc for " + val);
|
||||||
}
|
}
|
||||||
assertU(optimize()); // squeeze out any possible deleted docs
|
assertU(optimize()); // squeeze out any possible deleted docs
|
||||||
}
|
}
|
||||||
|
@ -77,14 +99,14 @@ public class TestFunctionQuery extends AbstractSolrTestCase {
|
||||||
// "//doc[./float[@name='foo_pf']='10.0' and ./float[@name='score']='10.0']"
|
// "//doc[./float[@name='foo_pf']='10.0' and ./float[@name='score']='10.0']"
|
||||||
|
|
||||||
for (int i=0; i<results.length; i+=2) {
|
for (int i=0; i<results.length; i+=2) {
|
||||||
String xpath = "//doc[./float[@name='" + field + "']='"
|
String xpath = "//doc[./float[@name='" + "id" + "']='"
|
||||||
+ results[i] + "' and ./float[@name='score']='"
|
+ results[i] + "' and ./float[@name='score']='"
|
||||||
+ results[i+1] + "']";
|
+ results[i+1] + "']";
|
||||||
tests.add(xpath);
|
tests.add(xpath);
|
||||||
}
|
}
|
||||||
|
|
||||||
assertQ(req("q", parseableQuery
|
assertQ(req("q", parseableQuery
|
||||||
,"fl", "*,score"
|
,"fl", "*,score","indent","on","rows","100"
|
||||||
)
|
)
|
||||||
, tests.toArray(new String[tests.size()])
|
, tests.toArray(new String[tests.size()])
|
||||||
);
|
);
|
||||||
|
@ -137,4 +159,78 @@ public class TestFunctionQuery extends AbstractSolrTestCase {
|
||||||
doTest("foo_pf"); // a plain float field
|
doTest("foo_pf"); // a plain float field
|
||||||
doTest("foo_f"); // a sortable float field
|
doTest("foo_f"); // a sortable float field
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testExternalField() {
|
||||||
|
String field = "foo_extf";
|
||||||
|
|
||||||
|
float[] ids = {100,-4,0,10,25,5,77,23,55,-78,-45,-24,63,78,94,22,34,54321,261,-627};
|
||||||
|
|
||||||
|
createIndex(null,ids);
|
||||||
|
|
||||||
|
// Unsorted field, largest first
|
||||||
|
makeExternalFile(field, "54321=543210\n0=-999\n25=250","UTF-8");
|
||||||
|
// test identity (straight field value)
|
||||||
|
singleTest(field, "\0", 54321, 543210, 0,-999, 25,250, 100, 1);
|
||||||
|
Object orig = FileFloatSource.onlyForTesting;
|
||||||
|
singleTest(field, "log(\0)");
|
||||||
|
// make sure the values were cached
|
||||||
|
assertTrue(orig == FileFloatSource.onlyForTesting);
|
||||||
|
singleTest(field, "sqrt(\0)");
|
||||||
|
assertTrue(orig == FileFloatSource.onlyForTesting);
|
||||||
|
|
||||||
|
makeExternalFile(field, "0=1","UTF-8");
|
||||||
|
assertU(commit());
|
||||||
|
assertTrue(orig != FileFloatSource.onlyForTesting);
|
||||||
|
|
||||||
|
|
||||||
|
Random r = new Random();
|
||||||
|
for (int i=0; i<10; i++) { // do more iterations for a thorough test
|
||||||
|
int len = r.nextInt(ids.length+1);
|
||||||
|
boolean sorted = r.nextBoolean();
|
||||||
|
// shuffle ids
|
||||||
|
for (int j=0; j<ids.length; j++) {
|
||||||
|
int other=r.nextInt(ids.length);
|
||||||
|
float v=ids[0];
|
||||||
|
ids[0] = ids[other];
|
||||||
|
ids[other] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sorted) {
|
||||||
|
// sort only the first elements
|
||||||
|
Arrays.sort(ids,0,len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// make random values
|
||||||
|
float[] vals = new float[len];
|
||||||
|
for (int j=0; j<len; j++) {
|
||||||
|
vals[j] = r.nextInt(200)-100;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make and write the external file
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int j=0; j<len; j++) {
|
||||||
|
sb.append("" + ids[j] + "=" + vals[j]+"\n");
|
||||||
|
}
|
||||||
|
makeExternalFile(field, sb.toString(),"UTF-8");
|
||||||
|
|
||||||
|
// make it visible
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
// test it
|
||||||
|
float[] answers = new float[ids.length*2];
|
||||||
|
for (int j=0; j<len; j++) {
|
||||||
|
answers[j*2] = ids[j];
|
||||||
|
answers[j*2+1] = vals[j];
|
||||||
|
}
|
||||||
|
for (int j=len; j<ids.length; j++) {
|
||||||
|
answers[j*2] = ids[j];
|
||||||
|
answers[j*2+1] = 1; // the default values
|
||||||
|
}
|
||||||
|
|
||||||
|
singleTest(field, "\0", answers);
|
||||||
|
System.out.println("Done test "+i);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -232,7 +232,10 @@
|
||||||
<!-- since fields of this type are by default not stored or indexed, any data added to
|
<!-- since fields of this type are by default not stored or indexed, any data added to
|
||||||
them will be ignored outright
|
them will be ignored outright
|
||||||
-->
|
-->
|
||||||
<fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" />
|
<fieldType name="ignored" stored="false" indexed="false" class="solr.StrField" />
|
||||||
|
|
||||||
|
<fieldType name="file" keyField="id" defVal="1" stored="false" indexed="false" class="solr.ExternalFileField" valType="float"/>
|
||||||
|
|
||||||
|
|
||||||
</types>
|
</types>
|
||||||
|
|
||||||
|
@ -282,6 +285,8 @@
|
||||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<dynamicField name="*_extf" type="file"/>
|
||||||
|
|
||||||
<dynamicField name="*_random" type="random" />
|
<dynamicField name="*_random" type="random" />
|
||||||
|
|
||||||
<!-- uncomment the following to ignore any fields that don't already match an existing
|
<!-- uncomment the following to ignore any fields that don't already match an existing
|
||||||
|
|
Loading…
Reference in New Issue