- LUCENE-853: Filter caching on the remote side of the RMI connection.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@526842 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2007-04-09 17:13:44 +00:00
parent 76dffdd6e0
commit 48e662cf52
7 changed files with 536 additions and 2 deletions

View File

@ -109,10 +109,14 @@ New features
(Michael Busch)
4. LUCENE-834: Added BoostingTermQuery which can boost scores based on the values of a payload (see #3 above.) (Grant Ingersoll)
5. LUCENE-834: Similarity has a new method for scoring payloads called scorePayloads that can be overridden to take advantage
of payload storage (see #3 above)
6. LUCENE-834: Added isPayloadAvailable() onto TermPositions interface and implemented it in the appropriate places (Grant Ingersoll)
7. LUCENE-853: Added RemoteCachingWrapperFilter to enable caching of Filters on the remote side of the RMI connection.
(Matt Ericson via Otis Gospodnetic)
Optimizations

View File

@ -30,13 +30,13 @@ import java.io.IOException;
* caching, keeping the two concerns decoupled yet composable.
*/
public class CachingWrapperFilter extends Filter {
private Filter filter;
protected Filter filter;
/**
* @todo What about serialization in RemoteSearchable? Caching won't work.
* Should transient be removed?
*/
private transient Map cache;
protected transient Map cache;
/**
* @param filter Filter to cache results of

View File

@ -0,0 +1,205 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
/**
* Filter caching singleton. It can be used by {@link org.apache.lucene.search.RemoteCachingWrapperFilter}
* or just to save filters locally for reuse.
* This class makes it possble to cache Filters even when using RMI, as it
* keeps the cache on the seaercher side of the RMI connection.
*
* Also could be used as a persistent storage for any filter as long as the
* filter provides a proper hashCode(), as that is used as the key in the cache.
*
* The cache is periodically cleaned up from a separate thread to ensure the
* cache doesn't exceed the maximum size.
* @author Matt Ericson
*/
public class FilterManager {
protected static FilterManager manager;
/** The default maximum number of Filters in the cache */
protected static final int DEFAULT_CACHE_CLEAN_SIZE = 100;
/** The default frequency of cache clenup */
protected static final long DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10;
/** The cache itself */
protected Map cache;
/** Maximum allowed cache size */
protected int cacheCleanSize;
/** Cache cleaning frequency */
protected long cleanSleepTime;
/** Cache cleaner that runs in a separate thread */
protected FilterCleaner filterCleaner;
public synchronized static FilterManager getInstance() {
if (manager == null) {
manager = new FilterManager();
}
return manager;
}
/**
* Sets up the FilterManager singleton.
*/
protected FilterManager() {
cache = new HashMap();
cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items
cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings
filterCleaner = new FilterCleaner();
Thread fcThread = new Thread(filterCleaner);
// setto be a Daemon so it doesn't have to be stopped
fcThread.setDaemon(true);
fcThread.start();
}
/**
* Sets the max size that cache should reach before it is cleaned up
* @param cacheCleanSize maximum allowed cache size
*/
public void setCacheSize(int cacheCleanSize) {
this.cacheCleanSize = cacheCleanSize;
}
/**
* Sets the cache cleaning frequency in milliseconds.
* @param cleanSleepTime cleaning frequency in millioseconds
*/
public void setCleanThreadSleepTime(long cleanSleepTime) {
this.cleanSleepTime = cleanSleepTime;
}
/**
* Returns the cached version of the filter. Allows the caller to pass up
* a small filter but this will keep a persistent version around and allow
* the caching filter to do its job.
*
* @param filter The input filter
* @return The cached version of the filter
*/
public Filter getFilter(Filter filter) {
synchronized(cache) {
FilterItem fi = null;
fi = (FilterItem)cache.get(new Integer(filter.hashCode()));
if (fi != null) {
fi.timestamp = new Date().getTime();
return fi.filter;
}
cache.put(new Integer(filter.hashCode()), new FilterItem(filter));
return filter;
}
}
/**
* Holds the filter and the last time the filter was used, to make LRU-based
* cache cleaning possible.
* TODO: Clean this up when we switch to Java 1.5
*/
protected class FilterItem {
public Filter filter;
public long timestamp;
public FilterItem (Filter filter) {
this.filter = filter;
this.timestamp = new Date().getTime();
}
}
/**
* Keeps the cache from getting too big.
* If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread
* to clean out the cache.
*
* The SortedSet sortedFilterItems is used only to sort the items from the cache,
* so when it's time to clean up we have the TreeSet sort the FilterItems by
* timestamp.
*
* Removes 1.5 * the numbers of items to make the cache smaller.
* For example:
* If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8.
* This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently.
*/
protected class FilterCleaner implements Runnable {
private boolean running = true;
private TreeSet sortedFilterItems;
public FilterCleaner() {
sortedFilterItems = new TreeSet(new Comparator() {
public int compare(Object a, Object b) {
if( a instanceof Map.Entry && b instanceof Map.Entry) {
FilterItem fia = (FilterItem) ((Map.Entry)a).getValue();
FilterItem fib = (FilterItem) ((Map.Entry)b).getValue();
if ( fia.timestamp == fib.timestamp ) {
return 0;
}
// smaller timestamp first
if ( fia.timestamp < fib.timestamp ) {
return -1;
}
// larger timestamp last
return 1;
} else {
throw new ClassCastException("Objects are not Map.Entry");
}
}
});
}
public void run () {
while (running) {
// sort items from oldest to newest
// we delete the oldest filters
if (cache.size() > cacheCleanSize) {
// empty the temporary set
sortedFilterItems.clear();
synchronized (cache) {
sortedFilterItems.addAll(cache.entrySet());
Iterator it = sortedFilterItems.iterator();
int numToDelete = (int) ((cache.size() - cacheCleanSize) * 1.5);
int counter = 0;
// loop over the set and delete all of the cache entries not used in a while
while (it.hasNext() && counter++ < numToDelete) {
Map.Entry entry = (Map.Entry)it.next();
cache.remove(entry.getKey());
}
}
// empty the set so we don't tie up the memory
sortedFilterItems.clear();
}
// take a nap
try {
Thread.sleep(cleanSleepTime);
} catch (InterruptedException e) {
// just keep going
}
}
}
}
}

View File

@ -0,0 +1,58 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
/**
* Provides caching of {@link Filter}s themselves on the remote end of an RMI connection.
* The cache is keyed on Filter's hashCode(), so if it sees the same filter twice
* it will reuse the original version.
* <p/>
* NOTE: This does NOT cache the Filter bits, but rather the Filter itself.
* Thus, this works hand-in-hand with {@link CachingWrapperFilter} to keep both
* file Filter cache and the Filter bits on the remote end, close to the searcher.
* <p/>
* Usage:
* <p/>
* To cache a result you must do something like
* RemoteCachingWrapperFilter f = new RemoteCachingWrapperFilter(new CachingWrapperFilter(myFilter));
* <p/>
* @author Matt Ericson
*/
public class RemoteCachingWrapperFilter extends Filter {
protected Filter filter;
public RemoteCachingWrapperFilter(Filter filter) {
this.filter = filter;
}
/**
* Uses the {@link FilterManager} to keep the cache for a filter on the
* searcher side of a remote connection.
* @param reader the index reader for the Filter
* @return the bitset
*/
public BitSet bits(IndexReader reader) throws IOException {
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
return cachedFilter.bits(reader);
}
}

View File

@ -0,0 +1,80 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import java.util.WeakHashMap;
import junit.framework.TestCase;
import org.apache.lucene.index.IndexReader;
/**
* A unit test helper class to test when the filter is getting cached and when it is not.
*/
public class CachingWrapperFilterHelper extends CachingWrapperFilter {
private boolean shouldHaveCache = false;
/**
* @param filter Filter to cache results of
*/
public CachingWrapperFilterHelper(Filter filter) {
super(filter);
}
public void setShouldHaveCache(boolean shouldHaveCache) {
this.shouldHaveCache = shouldHaveCache;
}
public BitSet bits(IndexReader reader) throws IOException {
if (cache == null) {
cache = new WeakHashMap();
}
synchronized (cache) { // check cache
BitSet cached = (BitSet) cache.get(reader);
if (shouldHaveCache) {
TestCase.assertNotNull("Cache should have data ", cached);
} else {
TestCase.assertNull("Cache should be null " + cached , cached);
}
if (cached != null) {
return cached;
}
}
final BitSet bits = filter.bits(reader);
synchronized (cache) { // update cache
cache.put(reader, bits);
}
return bits;
}
public String toString() {
return "CachingWrapperFilterHelper("+filter+")";
}
public boolean equals(Object o) {
if (!(o instanceof CachingWrapperFilterHelper)) return false;
return this.filter.equals((CachingWrapperFilterHelper)o);
}
}

View File

@ -0,0 +1,60 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import junit.framework.TestCase;
import org.apache.lucene.index.IndexReader;
/**
* A unit test helper class to help with RemoteCachingWrapperFilter testing and
* assert that it is working correctly.
* @author Matt Ericson
*/
public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter {
private boolean shouldHaveCache;
public RemoteCachingWrapperFilterHelper(Filter filter, boolean shouldHaveCache) {
super(filter);
this.shouldHaveCache = shouldHaveCache;
}
public void shouldHaveCache(boolean shouldHaveCache) {
this.shouldHaveCache = shouldHaveCache;
}
public BitSet bits(IndexReader reader) throws IOException {
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
TestCase.assertNotNull("Filter should not be null", cachedFilter);
if (!shouldHaveCache) {
TestCase.assertSame("First time filter should be the same ", filter, cachedFilter);
} else {
TestCase.assertNotSame("We should have a cached version of the filter", filter, cachedFilter);
}
if (filter instanceof CachingWrapperFilterHelper) {
((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache);
}
return cachedFilter.bits(reader);
}
}

View File

@ -0,0 +1,127 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.rmi.Naming;
import java.rmi.registry.LocateRegistry;
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
/**
* Tests that the index is cached on the searcher side of things.
* NOTE: This is copied from TestRemoteSearchable since it already had a remote index set up.
* @author Matt Ericson
*/
public class TestRemoteCachingWrapperFilter extends TestCase {
public TestRemoteCachingWrapperFilter(String name) {
super(name);
}
private static Searchable getRemote() throws Exception {
try {
return lookupRemote();
} catch (Throwable e) {
startServer();
return lookupRemote();
}
}
private static Searchable lookupRemote() throws Exception {
return (Searchable)Naming.lookup("//localhost/Searchable");
}
private static void startServer() throws Exception {
// construct an index
RAMDirectory indexStore = new RAMDirectory();
IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true);
Document doc = new Document();
doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("type", "A", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
//Need a second document to search for
doc = new Document();
doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("type", "B", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
// publish it
LocateRegistry.createRegistry(1099);
Searchable local = new IndexSearcher(indexStore);
RemoteSearchable impl = new RemoteSearchable(local);
Naming.rebind("//localhost/Searchable", impl);
}
private static void search(Query query, Filter filter, int hitNumber, String typeValue) throws Exception {
Searchable[] searchables = { getRemote() };
Searcher searcher = new MultiSearcher(searchables);
Hits result = searcher.search(query,filter);
assertEquals(1, result.length());
Document document = result.doc(hitNumber);
assertTrue("document is null and it shouldn't be", document != null);
assertEquals(typeValue, document.get("type"));
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 3, document.getFields().size() == 3);
}
public void testTermRemoteFilter() throws Exception {
CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))));
// This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never
// cache the filter on the remote site
cwfh.setShouldHaveCache(false);
search(new TermQuery(new Term("test", "test")), cwfh, 0, "A");
cwfh.setShouldHaveCache(false);
search(new TermQuery(new Term("test", "test")), cwfh, 0, "A");
// This is how we fix caching - we wrap a Filter in the RemoteCachingWrapperFilter(Handler - for testing)
// to cache the Filter on the searcher (remote) side
RemoteCachingWrapperFilterHelper rcwfh = new RemoteCachingWrapperFilterHelper(cwfh, false);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
// 2nd time we do the search, we should be using the cached Filter
rcwfh.shouldHaveCache(true);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
// assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper)
// this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys,
// and Filters' hashCode() builds on Filter parameters, not the Filter instance itself
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
rcwfh.shouldHaveCache(false);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
rcwfh.shouldHaveCache(true);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
// assert that we get a non-cached version of the Filter because this is a new Query (type:b)
rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false);
rcwfh.shouldHaveCache(false);
search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B");
}
}