SOLR-1900: optimize FileFloatSource for flex

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@984219 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-08-10 21:36:19 +00:00
parent 006bd17be2
commit ca4c8f3ae9
1 changed files with 14 additions and 73 deletions

View File

@ -211,8 +211,6 @@ public class FileFloatSource extends ValueSource {
String idName = StringHelper.intern(ffs.keyField.getName()); String idName = StringHelper.intern(ffs.keyField.getName());
FieldType idType = ffs.keyField.getType(); FieldType idType = ffs.keyField.getType();
boolean sorted=true; // assume sorted until we discover it's not
// warning: lucene's termEnum.skipTo() is not optimized... it simply does a next() // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
// because of this, simply ask the reader for a new termEnum rather than // because of this, simply ask the reader for a new termEnum rather than
@ -222,38 +220,25 @@ public class FileFloatSource extends ValueSource {
int notFoundCount=0; int notFoundCount=0;
int otherErrors=0; int otherErrors=0;
// Number of times to try termEnum.next() before resorting to skip
int numTimesNext = 10;
char delimiter='='; char delimiter='=';
BytesRef lastVal=new BytesRef("\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF");
BytesRef internalKey = new BytesRef(); BytesRef internalKey = new BytesRef();
BytesRef prevKey=new BytesRef();
BytesRef tmp;
try { try {
TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator(); TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator();
DocsEnum docsEnum = null; DocsEnum docsEnum = null;
BytesRef t = termsEnum.next();
if (t==null) t=lastVal; // removing deleted docs shouldn't matter
final Bits delDocs = MultiFields.getDeletedDocs(reader); // final Bits delDocs = MultiFields.getDeletedDocs(reader);
for (String line; (line=r.readLine())!=null;) { for (String line; (line=r.readLine())!=null;) {
int delimIndex = line.indexOf(delimiter); int delimIndex = line.indexOf(delimiter);
if (delimIndex < 0) continue; if (delimIndex < 0) continue;
int endIndex = line.length(); int endIndex = line.length();
/* EOLs should already be removed for BufferedReader.readLine()
for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
char ch = line.charAt(endIndex-1);
if (ch!='\n' && ch!='\r') break;
}
*/
String key = line.substring(0, delimIndex); String key = line.substring(0, delimIndex);
String val = line.substring(delimIndex+1, endIndex); String val = line.substring(delimIndex+1, endIndex);
tmp = prevKey; prevKey=internalKey; internalKey=tmp;
idType.readableToIndexed(key, internalKey); idType.readableToIndexed(key, internalKey);
float fval; float fval;
@ -268,65 +253,21 @@ public class FileFloatSource extends ValueSource {
continue; // go to next line in file.. leave values as default. continue; // go to next line in file.. leave values as default.
} }
if (sorted) { if (termsEnum.seek(internalKey, false) != TermsEnum.SeekStatus.FOUND) {
// make sure this key is greater than the previous key if (notFoundCount<10) { // collect first 10 not found for logging
sorted = internalKey.compareTo(prevKey) >= 0; notFound.add(key);
if (sorted) {
int countNext = 0;
for(;;) {
int cmp = internalKey.compareTo(t);
if (cmp == 0) {
docsEnum = termsEnum.docs(delDocs, docsEnum);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
vals[doc] = fval;
}
break;
} else if (cmp < 0) {
// term enum has already advanced past current key... we didn't find it.
if (notFoundCount<10) { // collect first 10 not found for logging
notFound.add(key);
}
notFoundCount++;
break;
} else {
// termEnum is less than our current key, so skip ahead
// try next() a few times to see if we hit or pass the target.
// Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
// so the best thing is to simply ask the reader for a new termEnum(target)
// if we really need to skip.
if (++countNext > numTimesNext) {
termsEnum.seek(internalKey);
t = termsEnum.term();
} else {
t = termsEnum.next();
}
if (t==null) t = lastVal;
}
} // end for(;;)
} }
notFoundCount++;
continue;
} }
if (!sorted) { docsEnum = termsEnum.docs(null, docsEnum);
TermsEnum.SeekStatus result = termsEnum.seek(internalKey); int doc;
t = termsEnum.term(); while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
if (result == TermsEnum.SeekStatus.FOUND) { vals[doc] = fval;
docsEnum = termsEnum.docs(delDocs, docsEnum);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
vals[doc] = fval;
}
} else {
if (notFoundCount<10) { // collect first 10 not found for logging
notFound.add(key);
}
notFoundCount++;
}
} }
} }
} catch (IOException e) { } catch (IOException e) {
// log, use defaults // log, use defaults
SolrCore.log.error("Error loading external value source: " +e); SolrCore.log.error("Error loading external value source: " +e);