LUCENE-2858: Fix remaining TODO: Re-add FieldCache insanity checking, got lost as tricky to implement

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1238112 13f79535-47bb-0310-9956-ffa450edef68
2012-01-31 00:33:19 +00:00 · 2012-01-31 00:33:19 +00:00 · 1648670485
parent 59c8c9e67e
commit 1648670485
6 changed files with 146 additions and 10 deletions
--- a/lucene/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java
+++ b/lucene/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java
@ -24,6 +24,8 @@ import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.CompositeReader;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.FieldCache.CacheEntry;

@ -146,6 +148,9 @@ public final class FieldCacheSanityChecker {
    insanity.addAll(checkValueMismatch(valIdToItems, 
                                       readerFieldToValIds, 
                                       valMismatchKeys));
+    insanity.addAll(checkSubreaders(valIdToItems, 
+                                    readerFieldToValIds));
+                    
    return insanity.toArray(new Insanity[insanity.size()]);
  }

@ -186,6 +191,107 @@ public final class FieldCacheSanityChecker {
    return insanity;
  }

+  /** 
+   * Internal helper method used by check that iterates over 
+   * the keys of readerFieldToValIds and generates a Collection 
+   * of Insanity instances whenever two (or more) ReaderField instances are 
+   * found that have an ancestry relationships.  
+   *
+   * @see InsanityType#SUBREADER
+   */
+  private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry>  valIdToItems,
+                                      MapOfSets<ReaderField, Integer> readerFieldToValIds) {
+
+    final List<Insanity> insanity = new ArrayList<Insanity>(23);
+
+    Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<ReaderField, Set<ReaderField>>(17);
+    MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<ReaderField, ReaderField>(badChildren); // wrapper
+
+    Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
+    Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
+
+    Set<ReaderField> seen = new HashSet<ReaderField>(17);
+
+    Set<ReaderField> readerFields = rfToValIdSets.keySet();
+    for (final ReaderField rf : readerFields) {
+      
+      if (seen.contains(rf)) continue;
+
+      List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
+      for (Object kidKey : kids) {
+        ReaderField kid = new ReaderField(kidKey, rf.fieldName);
+        
+        if (badChildren.containsKey(kid)) {
+          // we've already process this kid as RF and found other problems
+          // track those problems as our own
+          badKids.put(rf, kid);
+          badKids.putAll(rf, badChildren.get(kid));
+          badChildren.remove(kid);
+          
+        } else if (rfToValIdSets.containsKey(kid)) {
+          // we have cache entries for the kid
+          badKids.put(rf, kid);
+        }
+        seen.add(kid);
+      }
+      seen.add(rf);
+    }
+
+    // every mapping in badKids represents an Insanity
+    for (final ReaderField parent : badChildren.keySet()) {
+      Set<ReaderField> kids = badChildren.get(parent);
+
+      List<CacheEntry> badEntries = new ArrayList<CacheEntry>(kids.size() * 2);
+
+      // put parent entr(ies) in first
+      {
+        for (final Integer value  : rfToValIdSets.get(parent)) {
+          badEntries.addAll(viToItemSets.get(value));
+        }
+      }
+
+      // now the entries for the descendants
+      for (final ReaderField kid : kids) {
+        for (final Integer value : rfToValIdSets.get(kid)) {
+          badEntries.addAll(viToItemSets.get(value));
+        }
+      }
+
+      CacheEntry[] badness = new CacheEntry[badEntries.size()];
+      badness = badEntries.toArray(badness);
+
+      insanity.add(new Insanity(InsanityType.SUBREADER,
+                                "Found caches for descendants of " + 
+                                parent.toString(),
+                                badness));
+    }
+
+    return insanity;
+
+  }
+
+  /**
+   * Checks if the seed is an IndexReader, and if so will walk
+   * the hierarchy of subReaders building up a list of the objects 
+   * returned by obj.getFieldCacheKey()
+   */
+  private List<Object> getAllDescendantReaderKeys(Object seed) {
+    List<Object> all = new ArrayList<Object>(17); // will grow as we iter
+    all.add(seed);
+    for (int i = 0; i < all.size(); i++) {
+      Object obj = all.get(i);
+      if (obj instanceof CompositeReader) {
+        IndexReader[] subs = ((CompositeReader)obj).getSequentialSubReaders();
+        for (int j = 0; (null != subs) && (j < subs.length); j++) {
+          all.add(subs[j].getCoreCacheKey());
+        }
+      }
+      
+    }
+    // need to skip the first, because it was the seed
+    return all.subList(1, all.size());
+  }
+
  /**
   * Simple pair object for using "readerKey + fieldName" a Map key
   */
--- a/lucene/src/test-framework/java/org/apache/lucene/search/CheckHits.java
+++ b/lucene/src/test-framework/java/org/apache/lucene/search/CheckHits.java
@ -116,7 +116,7 @@ public class CheckHits {
      Assert.assertEquals("Wrap Reader " + i + ": " +
                          query.toString(defaultFieldName),
                          correct, actual);
-      // TODO: FieldCache.DEFAULT.purge(s.getIndexReader()); // our wrapping can create insanity otherwise
+      QueryUtils.purgeFieldCache(s.getIndexReader()); // our wrapping can create insanity otherwise
    }
  }

--- a/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/src/test-framework/java/org/apache/lucene/search/QueryUtils.java
@ -31,6 +31,7 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.RAMDirectory;
@ -116,14 +117,11 @@ public class QueryUtils {
        if (wrap) {
          IndexSearcher wrapped;
          check(random, q1, wrapped = wrapUnderlyingReader(random, s, -1), false);
-          // TODO: I removed that as we can never get insanity by composite readers anymore... Is this ok?
-          //FieldCache.DEFAULT.purge(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
+          purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
          check(random, q1, wrapped = wrapUnderlyingReader(random, s,  0), false);
-          // TODO: I removed that as we can never get insanity by composite readers anymore... Is this ok?
-          //FieldCache.DEFAULT.purge(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
+          purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
          check(random, q1, wrapped = wrapUnderlyingReader(random, s, +1), false);
-          // TODO: I removed that as we can never get insanity by composite readers anymore... Is this ok?
-          //FieldCache.DEFAULT.purge(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
+          purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
        }
        checkExplanations(q1,s);
        
@ -136,6 +134,11 @@ public class QueryUtils {
    }
  }
  
+  public static void purgeFieldCache(IndexReader r) throws IOException {
+    // this is just a hack, to get an atomic reader that contains all subreaders for insanity checks
+    FieldCache.DEFAULT.purge(SlowCompositeReaderWrapper.wrap(r));
+  }
+
  /**
   * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader 
   * is a MultiReader containing the Reader of the original IndexSearcher, 
--- a/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java
+++ b/lucene/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java
@ -138,4 +138,31 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
    cache.purgeAllCaches();
  }

+  public void testInsanity2() throws IOException {
+    FieldCache cache = FieldCache.DEFAULT;
+    cache.purgeAllCaches();
+
+    cache.getTerms(readerA, "theString");
+    cache.getTerms(readerB, "theString");
+    cache.getTerms(readerX, "theString");
+
+    cache.getBytes(readerX, "theByte", false);
+
+
+    // // // 
+
+    Insanity[] insanity = 
+      FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
+    
+    assertEquals("wrong number of cache errors", 1, insanity.length);
+    assertEquals("wrong type of cache error", 
+                 InsanityType.SUBREADER,
+                 insanity[0].getType());
+    assertEquals("wrong number of entries in cache error", 3,
+                 insanity[0].getCacheEntries().length);
+
+    // we expect bad things, don't let tearDown complain about them
+    cache.purgeAllCaches();
+  }
+
 }
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
@ -366,7 +366,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
          }
        }
      } finally {
-        // TODO: FieldCache.DEFAULT.purge(r);
+        QueryUtils.purgeFieldCache(r);
      }

      r.close();
--- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
+++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@ -1140,9 +1140,9 @@ public class TestGrouping extends LuceneTestCase {
          assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
        }
      } finally {
-        // TODO: FieldCache.DEFAULT.purge(r);
+        QueryUtils.purgeFieldCache(r);
        if (rBlocks != null) {
-          // TODO: FieldCache.DEFAULT.purge(rBlocks);
+          QueryUtils.purgeFieldCache(rBlocks);
        }
      }