mirror of https://github.com/apache/lucene.git
SOLR-1900: optimize FileFloatSource for flex
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@984219 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
006bd17be2
commit
ca4c8f3ae9
|
@ -211,8 +211,6 @@ public class FileFloatSource extends ValueSource {
|
|||
|
||||
String idName = StringHelper.intern(ffs.keyField.getName());
|
||||
FieldType idType = ffs.keyField.getType();
|
||||
boolean sorted=true; // assume sorted until we discover it's not
|
||||
|
||||
|
||||
// warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
|
||||
// because of this, simply ask the reader for a new termEnum rather than
|
||||
|
@ -222,38 +220,25 @@ public class FileFloatSource extends ValueSource {
|
|||
int notFoundCount=0;
|
||||
int otherErrors=0;
|
||||
|
||||
// Number of times to try termEnum.next() before resorting to skip
|
||||
int numTimesNext = 10;
|
||||
|
||||
char delimiter='=';
|
||||
|
||||
BytesRef lastVal=new BytesRef("\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF");
|
||||
BytesRef internalKey = new BytesRef();
|
||||
BytesRef prevKey=new BytesRef();
|
||||
BytesRef tmp;
|
||||
|
||||
try {
|
||||
TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator();
|
||||
DocsEnum docsEnum = null;
|
||||
BytesRef t = termsEnum.next();
|
||||
if (t==null) t=lastVal;
|
||||
final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||
|
||||
// removing deleted docs shouldn't matter
|
||||
// final Bits delDocs = MultiFields.getDeletedDocs(reader);
|
||||
|
||||
for (String line; (line=r.readLine())!=null;) {
|
||||
int delimIndex = line.indexOf(delimiter);
|
||||
if (delimIndex < 0) continue;
|
||||
|
||||
int endIndex = line.length();
|
||||
/* EOLs should already be removed for BufferedReader.readLine()
|
||||
for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
|
||||
char ch = line.charAt(endIndex-1);
|
||||
if (ch!='\n' && ch!='\r') break;
|
||||
}
|
||||
*/
|
||||
String key = line.substring(0, delimIndex);
|
||||
String val = line.substring(delimIndex+1, endIndex);
|
||||
|
||||
tmp = prevKey; prevKey=internalKey; internalKey=tmp;
|
||||
idType.readableToIndexed(key, internalKey);
|
||||
|
||||
float fval;
|
||||
|
@ -268,65 +253,21 @@ public class FileFloatSource extends ValueSource {
|
|||
continue; // go to next line in file.. leave values as default.
|
||||
}
|
||||
|
||||
if (sorted) {
|
||||
// make sure this key is greater than the previous key
|
||||
sorted = internalKey.compareTo(prevKey) >= 0;
|
||||
|
||||
if (sorted) {
|
||||
int countNext = 0;
|
||||
for(;;) {
|
||||
int cmp = internalKey.compareTo(t);
|
||||
if (cmp == 0) {
|
||||
docsEnum = termsEnum.docs(delDocs, docsEnum);
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
vals[doc] = fval;
|
||||
}
|
||||
break;
|
||||
} else if (cmp < 0) {
|
||||
// term enum has already advanced past current key... we didn't find it.
|
||||
if (termsEnum.seek(internalKey, false) != TermsEnum.SeekStatus.FOUND) {
|
||||
if (notFoundCount<10) { // collect first 10 not found for logging
|
||||
notFound.add(key);
|
||||
}
|
||||
notFoundCount++;
|
||||
break;
|
||||
} else {
|
||||
// termEnum is less than our current key, so skip ahead
|
||||
|
||||
// try next() a few times to see if we hit or pass the target.
|
||||
// Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
|
||||
// so the best thing is to simply ask the reader for a new termEnum(target)
|
||||
// if we really need to skip.
|
||||
if (++countNext > numTimesNext) {
|
||||
termsEnum.seek(internalKey);
|
||||
t = termsEnum.term();
|
||||
} else {
|
||||
t = termsEnum.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (t==null) t = lastVal;
|
||||
}
|
||||
} // end for(;;)
|
||||
}
|
||||
}
|
||||
|
||||
if (!sorted) {
|
||||
TermsEnum.SeekStatus result = termsEnum.seek(internalKey);
|
||||
t = termsEnum.term();
|
||||
if (result == TermsEnum.SeekStatus.FOUND) {
|
||||
docsEnum = termsEnum.docs(delDocs, docsEnum);
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
vals[doc] = fval;
|
||||
}
|
||||
} else {
|
||||
if (notFoundCount<10) { // collect first 10 not found for logging
|
||||
notFound.add(key);
|
||||
}
|
||||
notFoundCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
// log, use defaults
|
||||
SolrCore.log.error("Error loading external value source: " +e);
|
||||
|
|
Loading…
Reference in New Issue