SOLR-1900: optimize FileFloatSource for flex

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@984219 13f79535-47bb-0310-9956-ffa450edef68
2010-08-10 21:36:19 +00:00 · 2010-08-10 21:36:19 +00:00 · ca4c8f3ae9
parent 006bd17be2
commit ca4c8f3ae9
1 changed files with 14 additions and 73 deletions
--- a/solr/src/java/org/apache/solr/search/function/FileFloatSource.java
+++ b/solr/src/java/org/apache/solr/search/function/FileFloatSource.java
@ -211,8 +211,6 @@ public class FileFloatSource extends ValueSource {
    String idName = StringHelper.intern(ffs.keyField.getName());
    FieldType idType = ffs.keyField.getType();
    boolean sorted=true;   // assume sorted until we discover it's not
    // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
    // because of this, simply ask the reader for a new termEnum rather than
@ -222,38 +220,25 @@ public class FileFloatSource extends ValueSource {
    int notFoundCount=0;
    int otherErrors=0;
    // Number of times to try termEnum.next() before resorting to skip
    int numTimesNext = 10;
    char delimiter='=';
    BytesRef lastVal=new BytesRef("\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF");
    BytesRef internalKey = new BytesRef();
    BytesRef prevKey=new BytesRef();
    BytesRef tmp;
    try {
      TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator();
      DocsEnum docsEnum = null;
-      BytesRef t = termsEnum.next();
+
-      if (t==null) t=lastVal;
+      // removing deleted docs shouldn't matter
-      final Bits delDocs = MultiFields.getDeletedDocs(reader);
+      // final Bits delDocs = MultiFields.getDeletedDocs(reader);
      for (String line; (line=r.readLine())!=null;) {
        int delimIndex = line.indexOf(delimiter);
        if (delimIndex < 0) continue;
        int endIndex = line.length();
        /* EOLs should already be removed for BufferedReader.readLine()
        for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
          char ch = line.charAt(endIndex-1);
          if (ch!='\n' && ch!='\r') break;
        }
        */
        String key = line.substring(0, delimIndex);
        String val = line.substring(delimIndex+1, endIndex);
        tmp = prevKey; prevKey=internalKey; internalKey=tmp;
        idType.readableToIndexed(key, internalKey);
        float fval;
@ -268,65 +253,21 @@ public class FileFloatSource extends ValueSource {
          continue;  // go to next line in file.. leave values as default.
        }
-        if (sorted) {
+        if (termsEnum.seek(internalKey, false) != TermsEnum.SeekStatus.FOUND) {
-          // make sure this key is greater than the previous key
+          if (notFoundCount<10) {  // collect first 10 not found for logging
-          sorted = internalKey.compareTo(prevKey) >= 0;
+            notFound.add(key);
          if (sorted) {
            int countNext = 0;
            for(;;) {
              int cmp = internalKey.compareTo(t);
              if (cmp == 0) {
                docsEnum = termsEnum.docs(delDocs, docsEnum);
                int doc;
                while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
                  vals[doc] = fval;
                }
                break;
              } else if (cmp < 0) {
                // term enum has already advanced past current key... we didn't find it.
                if (notFoundCount<10) {  // collect first 10 not found for logging
                  notFound.add(key);
                }
                notFoundCount++;
                break;
              } else {
                // termEnum is less than our current key, so skip ahead
                // try next() a few times to see if we hit or pass the target.
                // Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
                // so the best thing is to simply ask the reader for a new termEnum(target)
                // if we really need to skip.
                if (++countNext > numTimesNext) {
                  termsEnum.seek(internalKey);
                  t = termsEnum.term();
                } else {
                  t = termsEnum.next();
                }
                if (t==null) t = lastVal;
              }
            } // end for(;;)
          }
          notFoundCount++;
          continue;
        }
-        if (!sorted) {
+        docsEnum = termsEnum.docs(null, docsEnum);
-          TermsEnum.SeekStatus result = termsEnum.seek(internalKey);
+        int doc;
-          t = termsEnum.term();
+        while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
-          if (result == TermsEnum.SeekStatus.FOUND) {
+          vals[doc] = fval;
            docsEnum = termsEnum.docs(delDocs, docsEnum);
            int doc;
            while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
              vals[doc] = fval;
            }
          } else {
            if (notFoundCount<10) {  // collect first 10 not found for logging
              notFound.add(key);
            }
            notFoundCount++;
          }
        }
      }
    } catch (IOException e) {
      // log, use defaults
      SolrCore.log.error("Error loading external value source: " +e);