From 927a44881c020efd6fa79ad5633c8e96bfa716df Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 22 Apr 2016 13:08:48 +0200 Subject: [PATCH 01/18] LUCENE-7243: Removed the LeafReaderContext parameter from QueryCachingPolicy#shouldCache. --- lucene/CHANGES.txt | 3 +++ .../apache/lucene/search/LRUQueryCache.java | 4 ++-- .../lucene/search/QueryCachingPolicy.java | 18 ++++++++---------- .../UsageTrackingQueryCachingPolicy.java | 7 +------ .../lucene/search/TestIndexSearcher.java | 2 +- .../lucene/search/TestLRUQueryCache.java | 8 ++++---- .../TestUsageTrackingFilterCachingPolicy.java | 17 +---------------- .../apache/lucene/queries/TermsQueryTest.java | 16 ++-------------- .../org/apache/lucene/util/LuceneTestCase.java | 2 +- 9 files changed, 23 insertions(+), 54 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c2561b3fd2c..18409c99685 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -40,6 +40,9 @@ API Changes * LUCENE-7150: Spatial3d gets useful APIs to create common shape queries, matching LatLonPoint. (Karl Wright via Mike McCandless) +* LUCENE-7243: Removed the LeafReaderContext parameter from + QueryCachingPolicy#shouldCache. (Adrien Grand) + Optimizations * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10% diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index f4cf8dc6d15..15c0f2b508a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -661,7 +661,7 @@ public class LRUQueryCache implements QueryCache, Accountable { DocIdSet docIdSet = get(in.getQuery(), context); if (docIdSet == null) { - if (policy.shouldCache(in.getQuery(), context)) { + if (policy.shouldCache(in.getQuery())) { docIdSet = cache(context); putIfAbsent(in.getQuery(), context, docIdSet); } else { @@ -694,7 +694,7 @@ public class LRUQueryCache implements QueryCache, Accountable { DocIdSet docIdSet = get(in.getQuery(), context); if (docIdSet == null) { - if (policy.shouldCache(in.getQuery(), context)) { + if (policy.shouldCache(in.getQuery())) { docIdSet = cache(context); putIfAbsent(in.getQuery(), context, docIdSet); } else { diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java index c0f6aa82c79..fabd971b6e7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java +++ b/lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java @@ -19,8 +19,6 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.LeafReaderContext; - /** * A policy defining which filters should be cached. * @@ -40,7 +38,7 @@ public interface QueryCachingPolicy { public void onUse(Query query) {} @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return true; } @@ -51,12 +49,12 @@ public interface QueryCachingPolicy { * in order to make decisions. */ void onUse(Query query); - /** Whether the given {@link DocIdSet} should be cached on a given segment. - * This method will be called on each leaf context to know if the filter - * should be cached on this particular leaf. The filter cache will first - * attempt to load a {@link DocIdSet} from the cache. If it is not cached - * yet and this method returns true then a cache entry will be - * generated. Otherwise an uncached set will be returned. */ - boolean shouldCache(Query query, LeafReaderContext context) throws IOException; + /** Whether the given {@link Query} is worth caching. + * This method will be called by the {@link QueryCache} to know whether to + * cache. It will first attempt to load a {@link DocIdSet} from the cache. + * If it is not cached yet and this method returns true then a + * cache entry will be generated. Otherwise an uncached scorer will be + * returned. */ + boolean shouldCache(Query query) throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java index 4eb928943e3..ab68eeb61b3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java +++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java @@ -19,17 +19,12 @@ package org.apache.lucene.search; import java.io.IOException; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.FrequencyTrackingRingBuffer; /** * A {@link QueryCachingPolicy} that tracks usage statistics of recently-used * filters in order to decide on which filters are worth caching. * - * It also uses some heuristics on segments, filters and the doc id sets that - * they produce in order to cache more aggressively when the execution cost - * significantly outweighs the caching overhead. - * * @lucene.experimental */ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy { @@ -128,7 +123,7 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy } @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { if (query instanceof MatchAllDocsQuery // MatchNoDocsQuery currently rewrites to a BooleanQuery, // but who knows, it might get its own Weight one day diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java b/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java index e7cdfcdd69b..8201bf81536 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java @@ -207,7 +207,7 @@ public class TestIndexSearcher extends LuceneTestCase { assertEquals(IndexSearcher.getDefaultQueryCachingPolicy(), searcher.getQueryCachingPolicy()); QueryCachingPolicy dummyPolicy = new QueryCachingPolicy() { @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return false; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 2552b04dffc..63ccdd879a3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -65,7 +65,7 @@ public class TestLRUQueryCache extends LuceneTestCase { public void onUse(Query query) {} @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return false; } @@ -455,7 +455,7 @@ public class TestLRUQueryCache extends LuceneTestCase { final QueryCachingPolicy countingPolicy = new QueryCachingPolicy() { @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return random().nextBoolean(); } @@ -762,7 +762,7 @@ public class TestLRUQueryCache extends LuceneTestCase { final QueryCachingPolicy policy = new QueryCachingPolicy() { @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { assertEquals(expectedCacheKey, query); return true; } @@ -1080,7 +1080,7 @@ public class TestLRUQueryCache extends LuceneTestCase { } @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return true; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index c656b855930..29ed22fb63b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -16,15 +16,8 @@ */ package org.apache.lucene.search; -import org.apache.lucene.document.Document; import org.apache.lucene.document.IntPoint; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase { @@ -41,15 +34,7 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase { for (int i = 0; i < 1000; ++i) { policy.onUse(q); } - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); - w.addDocument(new Document()); - IndexReader r = DirectoryReader.open(w); - assertFalse(policy.shouldCache(q, getOnlyLeafReader(r).getContext())); - - r.close(); - w.close(); - dir.close(); + assertFalse(policy.shouldCache(q)); } } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java index a87e45d23f0..71eeef45a54 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java @@ -330,24 +330,12 @@ public class TermsQueryTest extends LuceneTestCase { } public void testIsConsideredCostlyByQueryCache() throws IOException { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(); - IndexWriter w = new IndexWriter(dir, iwc); - Document doc = new Document(); - for (int i = 0; i < 10000; ++i) { - w.addDocument(doc); - } - w.forceMerge(1); - DirectoryReader reader = DirectoryReader.open(w); - w.close(); TermsQuery query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz")); UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy(); - assertFalse(policy.shouldCache(query, getOnlyLeafReader(reader).getContext())); + assertFalse(policy.shouldCache(query)); policy.onUse(query); policy.onUse(query); // cached after two uses - assertTrue(policy.shouldCache(query, getOnlyLeafReader(reader).getContext())); - reader.close(); - dir.close(); + assertTrue(policy.shouldCache(query)); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 70ed86d099a..52aca7e8f52 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -476,7 +476,7 @@ public abstract class LuceneTestCase extends Assert { public void onUse(Query query) {} @Override - public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { + public boolean shouldCache(Query query) throws IOException { return random().nextBoolean(); } From bf232d7635e1686cd6f5624525fa3e0b7820430f Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 22 Apr 2016 14:09:44 +0200 Subject: [PATCH 02/18] LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than waiting on a lock. --- lucene/CHANGES.txt | 3 + .../apache/lucene/search/LRUQueryCache.java | 234 +++++++++++------- 2 files changed, 154 insertions(+), 83 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 18409c99685..4b72294a1f1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,6 +76,9 @@ Optimizations * LUCENE-7238: Explicitly disable the query cache in MemoryIndex#createSearcher. (Adrien Grand) +* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than + waiting on a lock. (Adrien Grand) + Bug Fixes * LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir) diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index 15c0f2b508a..859864572b1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Predicate; import org.apache.lucene.index.LeafReader.CoreClosedListener; @@ -112,6 +113,7 @@ public class LRUQueryCache implements QueryCache, Accountable { // mostRecentlyUsedQueries. This is why write operations are performed under a lock private final Set mostRecentlyUsedQueries; private final Map cache; + private final ReentrantLock lock; // these variables are volatile so that we do not need to sync reads // but increments need to be performed under the lock @@ -134,6 +136,7 @@ public class LRUQueryCache implements QueryCache, Accountable { uniqueQueries = new LinkedHashMap<>(16, 0.75f, true); mostRecentlyUsedQueries = uniqueQueries.keySet(); cache = new IdentityHashMap<>(); + lock = new ReentrantLock(); ramBytesUsed = 0; } @@ -182,6 +185,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onHit(Object readerCoreKey, Query query) { + assert lock.isHeldByCurrentThread(); hitCount += 1; } @@ -191,6 +195,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onMiss(Object readerCoreKey, Query query) { + assert lock.isHeldByCurrentThread(); assert query != null; missCount += 1; } @@ -203,6 +208,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onQueryCache(Query query, long ramBytesUsed) { + assert lock.isHeldByCurrentThread(); this.ramBytesUsed += ramBytesUsed; } @@ -212,6 +218,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onQueryEviction(Query query, long ramBytesUsed) { + assert lock.isHeldByCurrentThread(); this.ramBytesUsed -= ramBytesUsed; } @@ -223,6 +230,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onDocIdSetCache(Object readerCoreKey, long ramBytesUsed) { + assert lock.isHeldByCurrentThread(); cacheSize += 1; cacheCount += 1; this.ramBytesUsed += ramBytesUsed; @@ -235,6 +243,7 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onDocIdSetEviction(Object readerCoreKey, int numEntries, long sumRamBytesUsed) { + assert lock.isHeldByCurrentThread(); this.ramBytesUsed -= sumRamBytesUsed; cacheSize -= numEntries; } @@ -244,12 +253,14 @@ public class LRUQueryCache implements QueryCache, Accountable { * @lucene.experimental */ protected void onClear() { + assert lock.isHeldByCurrentThread(); ramBytesUsed = 0; cacheSize = 0; } /** Whether evictions are required. */ boolean requiresEviction() { + assert lock.isHeldByCurrentThread(); final int size = mostRecentlyUsedQueries.size(); if (size == 0) { return false; @@ -258,7 +269,8 @@ public class LRUQueryCache implements QueryCache, Accountable { } } - synchronized DocIdSet get(Query key, LeafReaderContext context) { + DocIdSet get(Query key, LeafReaderContext context) { + assert lock.isHeldByCurrentThread(); assert key instanceof BoostQuery == false; assert key instanceof ConstantScoreQuery == false; final Object readerKey = context.reader().getCoreCacheKey(); @@ -282,40 +294,45 @@ public class LRUQueryCache implements QueryCache, Accountable { return cached; } - synchronized void putIfAbsent(Query query, LeafReaderContext context, DocIdSet set) { - // under a lock to make sure that mostRecentlyUsedQueries and cache remain sync'ed - // we don't want to have user-provided queries as keys in our cache since queries are mutable + void putIfAbsent(Query query, LeafReaderContext context, DocIdSet set) { assert query instanceof BoostQuery == false; assert query instanceof ConstantScoreQuery == false; - Query singleton = uniqueQueries.putIfAbsent(query, query); - if (singleton == null) { - onQueryCache(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query)); - } else { - query = singleton; + // under a lock to make sure that mostRecentlyUsedQueries and cache remain sync'ed + lock.lock(); + try { + Query singleton = uniqueQueries.putIfAbsent(query, query); + if (singleton == null) { + onQueryCache(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query)); + } else { + query = singleton; + } + final Object key = context.reader().getCoreCacheKey(); + LeafCache leafCache = cache.get(key); + if (leafCache == null) { + leafCache = new LeafCache(key); + final LeafCache previous = cache.put(context.reader().getCoreCacheKey(), leafCache); + ramBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY; + assert previous == null; + // we just created a new leaf cache, need to register a close listener + context.reader().addCoreClosedListener(new CoreClosedListener() { + @Override + public void onClose(Object ownerCoreCacheKey) { + clearCoreCacheKey(ownerCoreCacheKey); + } + }); + } + leafCache.putIfAbsent(query, set); + evictIfNecessary(); + } finally { + lock.unlock(); } - final Object key = context.reader().getCoreCacheKey(); - LeafCache leafCache = cache.get(key); - if (leafCache == null) { - leafCache = new LeafCache(key); - final LeafCache previous = cache.put(context.reader().getCoreCacheKey(), leafCache); - ramBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY; - assert previous == null; - // we just created a new leaf cache, need to register a close listener - context.reader().addCoreClosedListener(new CoreClosedListener() { - @Override - public void onClose(Object ownerCoreCacheKey) { - clearCoreCacheKey(ownerCoreCacheKey); - } - }); - } - leafCache.putIfAbsent(query, set); - evictIfNecessary(); } - synchronized void evictIfNecessary() { + void evictIfNecessary() { + assert lock.isHeldByCurrentThread(); // under a lock to make sure that mostRecentlyUsedQueries and cache keep sync'ed if (requiresEviction()) { - + Iterator iterator = mostRecentlyUsedQueries.iterator(); do { final Query query = iterator.next(); @@ -337,31 +354,42 @@ public class LRUQueryCache implements QueryCache, Accountable { /** * Remove all cache entries for the given core cache key. */ - public synchronized void clearCoreCacheKey(Object coreKey) { - final LeafCache leafCache = cache.remove(coreKey); - if (leafCache != null) { - ramBytesUsed -= HASHTABLE_RAM_BYTES_PER_ENTRY; - final int numEntries = leafCache.cache.size(); - if (numEntries > 0) { - onDocIdSetEviction(coreKey, numEntries, leafCache.ramBytesUsed); - } else { - assert numEntries == 0; - assert leafCache.ramBytesUsed == 0; + public void clearCoreCacheKey(Object coreKey) { + lock.lock(); + try { + final LeafCache leafCache = cache.remove(coreKey); + if (leafCache != null) { + ramBytesUsed -= HASHTABLE_RAM_BYTES_PER_ENTRY; + final int numEntries = leafCache.cache.size(); + if (numEntries > 0) { + onDocIdSetEviction(coreKey, numEntries, leafCache.ramBytesUsed); + } else { + assert numEntries == 0; + assert leafCache.ramBytesUsed == 0; + } } + } finally { + lock.unlock(); } } /** * Remove all cache entries for the given query. */ - public synchronized void clearQuery(Query query) { - final Query singleton = uniqueQueries.remove(query); - if (singleton != null) { - onEviction(singleton); + public void clearQuery(Query query) { + lock.lock(); + try { + final Query singleton = uniqueQueries.remove(query); + if (singleton != null) { + onEviction(singleton); + } + } finally { + lock.unlock(); } } private void onEviction(Query singleton) { + assert lock.isHeldByCurrentThread(); onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton)); for (LeafCache leafCache : cache.values()) { leafCache.remove(singleton); @@ -371,55 +399,70 @@ public class LRUQueryCache implements QueryCache, Accountable { /** * Clear the content of this cache. */ - public synchronized void clear() { - cache.clear(); - mostRecentlyUsedQueries.clear(); - onClear(); + public void clear() { + lock.lock(); + try { + cache.clear(); + mostRecentlyUsedQueries.clear(); + onClear(); + } finally { + lock.unlock(); + } } // pkg-private for testing - synchronized void assertConsistent() { - if (requiresEviction()) { - throw new AssertionError("requires evictions: size=" + mostRecentlyUsedQueries.size() - + ", maxSize=" + maxSize + ", ramBytesUsed=" + ramBytesUsed() + ", maxRamBytesUsed=" + maxRamBytesUsed); - } - for (LeafCache leafCache : cache.values()) { - Set keys = Collections.newSetFromMap(new IdentityHashMap<>()); - keys.addAll(leafCache.cache.keySet()); - keys.removeAll(mostRecentlyUsedQueries); - if (!keys.isEmpty()) { - throw new AssertionError("One leaf cache contains more keys than the top-level cache: " + keys); + void assertConsistent() { + lock.lock(); + try { + if (requiresEviction()) { + throw new AssertionError("requires evictions: size=" + mostRecentlyUsedQueries.size() + + ", maxSize=" + maxSize + ", ramBytesUsed=" + ramBytesUsed() + ", maxRamBytesUsed=" + maxRamBytesUsed); } - } - long recomputedRamBytesUsed = - HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size() - + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size(); - for (Query query : mostRecentlyUsedQueries) { - recomputedRamBytesUsed += ramBytesUsed(query); - } - for (LeafCache leafCache : cache.values()) { - recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size(); - for (DocIdSet set : leafCache.cache.values()) { - recomputedRamBytesUsed += set.ramBytesUsed(); + for (LeafCache leafCache : cache.values()) { + Set keys = Collections.newSetFromMap(new IdentityHashMap<>()); + keys.addAll(leafCache.cache.keySet()); + keys.removeAll(mostRecentlyUsedQueries); + if (!keys.isEmpty()) { + throw new AssertionError("One leaf cache contains more keys than the top-level cache: " + keys); + } + } + long recomputedRamBytesUsed = + HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size() + + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size(); + for (Query query : mostRecentlyUsedQueries) { + recomputedRamBytesUsed += ramBytesUsed(query); + } + for (LeafCache leafCache : cache.values()) { + recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size(); + for (DocIdSet set : leafCache.cache.values()) { + recomputedRamBytesUsed += set.ramBytesUsed(); + } + } + if (recomputedRamBytesUsed != ramBytesUsed) { + throw new AssertionError("ramBytesUsed mismatch : " + ramBytesUsed + " != " + recomputedRamBytesUsed); } - } - if (recomputedRamBytesUsed != ramBytesUsed) { - throw new AssertionError("ramBytesUsed mismatch : " + ramBytesUsed + " != " + recomputedRamBytesUsed); - } - long recomputedCacheSize = 0; - for (LeafCache leafCache : cache.values()) { - recomputedCacheSize += leafCache.cache.size(); - } - if (recomputedCacheSize != getCacheSize()) { - throw new AssertionError("cacheSize mismatch : " + getCacheSize() + " != " + recomputedCacheSize); + long recomputedCacheSize = 0; + for (LeafCache leafCache : cache.values()) { + recomputedCacheSize += leafCache.cache.size(); + } + if (recomputedCacheSize != getCacheSize()) { + throw new AssertionError("cacheSize mismatch : " + getCacheSize() + " != " + recomputedCacheSize); + } + } finally { + lock.unlock(); } } // pkg-private for testing // return the list of cached queries in LRU order - synchronized List cachedQueries() { - return new ArrayList<>(mostRecentlyUsedQueries); + List cachedQueries() { + lock.lock(); + try { + return new ArrayList<>(mostRecentlyUsedQueries); + } finally { + lock.unlock(); + } } @Override @@ -438,8 +481,11 @@ public class LRUQueryCache implements QueryCache, Accountable { @Override public Collection getChildResources() { - synchronized (this) { + lock.lock(); + try { return Accountables.namedAccountables("segment", cache); + } finally { + lock.unlock(); } } @@ -659,7 +705,18 @@ public class LRUQueryCache implements QueryCache, Accountable { return in.scorer(context); } - DocIdSet docIdSet = get(in.getQuery(), context); + // If the lock is already busy, prefer using the uncached version than waiting + if (lock.tryLock() == false) { + return in.scorer(context); + } + + DocIdSet docIdSet; + try { + docIdSet = get(in.getQuery(), context); + } finally { + lock.unlock(); + } + if (docIdSet == null) { if (policy.shouldCache(in.getQuery())) { docIdSet = cache(context); @@ -692,7 +749,18 @@ public class LRUQueryCache implements QueryCache, Accountable { return in.bulkScorer(context); } - DocIdSet docIdSet = get(in.getQuery(), context); + // If the lock is already busy, prefer using the uncached version than waiting + if (lock.tryLock() == false) { + return in.bulkScorer(context); + } + + DocIdSet docIdSet; + try { + docIdSet = get(in.getQuery(), context); + } finally { + lock.unlock(); + } + if (docIdSet == null) { if (policy.shouldCache(in.getQuery())) { docIdSet = cache(context); From 776f9ec7c8f2a3a07c5ce5229c66c2f113291ba9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 Apr 2016 12:08:16 -0400 Subject: [PATCH 03/18] LUCENE-7242: LatLonTree should build a balanced tree --- lucene/CHANGES.txt | 3 +- .../apache/lucene/document/LatLonTree.java | 71 +++++++++---------- 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4b72294a1f1..848e0220839 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -64,8 +64,7 @@ Optimizations multiple polygons and holes, with memory usage independent of polygon complexity. (Karl Wright, Mike McCandless, Robert Muir) -* LUCENE-7159, LUCENE-7222, LUCENE-7229, LUCENE-7239: Speed up LatLonPoint - polygon performance. (Robert Muir) +* LUCENE-7159: Speed up LatLonPoint polygon performance. (Robert Muir, Ryan Ernst) * LUCENE-7211: Reduce memory & GC for spatial RPT Intersects when the number of matching docs is small. (Jeff Wartes, David Smiley) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java index 8a6e6d8264a..f7a2927e2fb 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java @@ -16,17 +16,13 @@ */ package org.apache.lucene.document; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Random; import org.apache.lucene.geo.Polygon; import org.apache.lucene.index.PointValues.Relation; /** - * 2D polygon implementation represented as a randomized interval tree of edges. + * 2D polygon implementation represented as a balanced interval tree of edges. *

* contains() and crosses() are still O(n), but for most practical polygons * are much faster than brute force. @@ -338,45 +334,44 @@ final class LatLonTree { * @return root node of the tree. */ private static Edge createTree(double polyLats[], double polyLons[]) { - // edge order is deterministic and reproducible based on the double values. - // TODO: make a real balanced tree instead :) - List list = new ArrayList(polyLats.length - 1); + Edge edges[] = new Edge[polyLats.length - 1]; for (int i = 1; i < polyLats.length; i++) { - list.add(i); - } - Collections.shuffle(list, new Random(Arrays.hashCode(polyLats) ^ Arrays.hashCode(polyLons))); - Edge root = null; - for (int i : list) { double lat1 = polyLats[i-1]; double lon1 = polyLons[i-1]; double lat2 = polyLats[i]; double lon2 = polyLons[i]; - Edge newNode = new Edge(lat1, lon1, lat2, lon2, Math.min(lat1, lat2), Math.max(lat1, lat2)); - if (root == null) { - // add first node - root = newNode; - } else { - // traverse tree to find home for new node, along the path updating all parent's max value along the way. - Edge node = root; - while (true) { - node.max = Math.max(node.max, newNode.max); - if (newNode.low < node.low) { - if (node.left == null) { - node.left = newNode; - break; - } - node = node.left; - } else { - if (node.right == null) { - node.right = newNode; - break; - } - node = node.right; - } - } - } + edges[i - 1] = new Edge(lat1, lon1, lat2, lon2, Math.min(lat1, lat2), Math.max(lat1, lat2)); } - return root; + // sort the edges then build a balanced tree from them + Arrays.sort(edges, (left, right) -> { + int ret = Double.compare(left.low, right.low); + if (ret == 0) { + ret = Double.compare(left.max, right.max); + } + return ret; + }); + return createTree(edges, 0, edges.length - 1); + } + + /** Creates tree from sorted edges (with range low and high inclusive) */ + private static Edge createTree(Edge edges[], int low, int high) { + if (low > high) { + return null; + } + // add midpoint + int mid = (low + high) >>> 1; + Edge newNode = edges[mid]; + // add children + newNode.left = createTree(edges, low, mid - 1); + newNode.right = createTree(edges, mid + 1, high); + // pull up max values to this node + if (newNode.left != null) { + newNode.max = Math.max(newNode.max, newNode.left.max); + } + if (newNode.right != null) { + newNode.max = Math.max(newNode.max, newNode.right.max); + } + return newNode; } /** From 666472b74f2063a2a894837ee3768335bcf7f36a Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Fri, 22 Apr 2016 18:21:41 +0100 Subject: [PATCH 04/18] SOLR-9025: Add SolrCoreTest.testImplicitPlugins test. --- .../org/apache/solr/core/SolrCoreTest.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java index 54827079204..200935a107c 100644 --- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java +++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java @@ -29,6 +29,7 @@ import org.apache.solr.util.plugin.SolrCoreAware; import org.junit.Test; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; @@ -68,6 +69,45 @@ public class SolrCoreTest extends SolrTestCaseJ4 { assertEquals( core.getRequestHandlers().get( path ), handler2 ); } + @Test + public void testImplicitPlugins() { + final SolrCore core = h.getCore(); + final List implicitHandlers = core.getImplicitHandlers(); + + final Map pathToClassMap = new HashMap<>(implicitHandlers.size()); + for (PluginInfo implicitHandler : implicitHandlers) { + assertEquals("wrong type for "+implicitHandler.toString(), + SolrRequestHandler.TYPE, implicitHandler.type); + pathToClassMap.put(implicitHandler.name, implicitHandler.className); + } + + int ihCount = 0; + { + ++ihCount; assertEquals(pathToClassMap.get("/admin/file"), "solr.ShowFileRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/logging"), "solr.LoggingHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/luke"), "solr.LukeRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/mbeans"), "solr.SolrInfoMBeanHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/ping"), "solr.PingRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/plugins"), "solr.PluginInfoHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/properties"), "solr.PropertiesRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/segments"), "solr.SegmentsInfoRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/system"), "solr.SystemInfoHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/replication"), "solr.ReplicationHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/schema"), "solr.SchemaHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/sql"), "solr.SQLHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/stream"), "solr.StreamHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/update"), "solr.UpdateRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/update/csv"), "solr.UpdateRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/update/json"), "solr.UpdateRequestHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/update/json/docs"), "solr.UpdateRequestHandler"); + } + assertEquals("wrong number of implicit handlers", ihCount, implicitHandlers.size()); + } + @Test public void testClose() throws Exception { final CoreContainer cores = h.getCoreContainer(); From 88c9da6c899c7015f6c9a818a4a4f91984022254 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 Apr 2016 15:10:39 -0400 Subject: [PATCH 05/18] LUCENE-7249: LatLonPoint polygon should use tree relate() --- .../src/java/org/apache/lucene/document/LatLonGrid.java | 4 ++-- .../apache/lucene/document/LatLonPointInPolygonQuery.java | 5 +++-- .../src/test/org/apache/lucene/document/TestLatLonGrid.java | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java index 2083f03fd1b..4b3b2b2acb9 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java @@ -62,12 +62,12 @@ final class LatLonGrid { final LatLonTree[] tree; - LatLonGrid(int minLat, int maxLat, int minLon, int maxLon, Polygon... polygons) { + LatLonGrid(int minLat, int maxLat, int minLon, int maxLon, LatLonTree[] tree) { this.minLat = minLat; this.maxLat = maxLat; this.minLon = minLon; this.maxLon = maxLon; - this.tree = LatLonTree.build(polygons); + this.tree = tree; if (minLon > maxLon) { // maybe make 2 grids if you want this? throw new IllegalArgumentException("Grid cannot cross the dateline"); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index 15361b5640a..23a98d22c77 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -92,10 +92,11 @@ final class LatLonPointInPolygonQuery extends Query { NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0); NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); + final LatLonTree[] tree = LatLonTree.build(polygons); final LatLonGrid grid = new LatLonGrid(encodeLatitude(box.minLat), encodeLatitude(box.maxLat), encodeLongitude(box.minLon), - encodeLongitude(box.maxLon), polygons); + encodeLongitude(box.maxLon), tree); return new ConstantScoreWeight(this) { @@ -156,7 +157,7 @@ final class LatLonPointInPolygonQuery extends Query { double cellMaxLat = decodeLatitude(maxPackedValue, 0); double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES); - return Polygon.relate(polygons, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon); + return LatLonTree.relate(tree, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon); } }); diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java index 891e3d52fba..0c185ea366d 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java @@ -40,7 +40,7 @@ public class TestLatLonGrid extends LuceneTestCase { int maxLat = encodeLatitude(box.maxLat); int minLon = encodeLongitude(box.minLon); int maxLon = encodeLongitude(box.maxLon); - LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, polygon); + LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, LatLonTree.build(polygon)); // we are in integer space... but exhaustive testing is slow! for (int j = 0; j < 10000; j++) { int lat = TestUtil.nextInt(random(), minLat, maxLat); @@ -79,7 +79,7 @@ public class TestLatLonGrid extends LuceneTestCase { int maxLat = encodeLatitude(box.maxLat); int minLon = encodeLongitude(box.minLon); int maxLon = encodeLongitude(box.maxLon); - LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, polygon); + LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, LatLonTree.build(polygon)); // we are in integer space... but exhaustive testing is slow! for (int j = 0; j < 1000; j++) { int lat = TestUtil.nextInt(random(), minLat, maxLat); @@ -99,7 +99,7 @@ public class TestLatLonGrid extends LuceneTestCase { double ONE = decodeLatitude(1); Polygon tiny = new Polygon(new double[] { ZERO, ZERO, ONE, ONE, ZERO }, new double[] { ZERO, ONE, ONE, ZERO, ZERO }); for (int max = 1; max < 500000; max++) { - LatLonGrid grid = new LatLonGrid(0, max, 0, max, tiny); + LatLonGrid grid = new LatLonGrid(0, max, 0, max, LatLonTree.build(tiny)); assertEquals(tiny.contains(decodeLatitude(max), decodeLongitude(max)), grid.contains(max, max)); } } From 38ebd906e830e793d7df364163f0baab049ffa47 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Fri, 22 Apr 2016 16:35:51 -0400 Subject: [PATCH 06/18] LUCENE-7244: Complain if the holes are outside the polygon. --- .../spatial3d/geom/GeoConcavePolygon.java | 35 +++++++++++-------- .../spatial3d/geom/GeoConvexPolygon.java | 35 +++++++++++-------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java index 124b46bf2ae..8eaea1a4937 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java @@ -207,21 +207,6 @@ class GeoConcavePolygon extends GeoBasePolygon { invertedEdges[i] = new SidedPlane(edges[i]); notableEdgePoints[i] = new GeoPoint[]{start, end}; } - /* Disable since GeoPolygonFactory does this too. - // In order to naively confirm that the polygon is concave, I would need to - // check every edge, and verify that every point (other than the edge endpoints) - // is within the edge's sided plane. This is an order n^2 operation. That's still - // not wrong, though, because everything else about polygons has a similar cost. - for (int edgeIndex = 0; edgeIndex < edges.length; edgeIndex++) { - final SidedPlane edge = edges[edgeIndex]; - for (int pointIndex = 0; pointIndex < points.size(); pointIndex++) { - if (pointIndex != edgeIndex && pointIndex != legalIndex(edgeIndex + 1)) { - if (edge.isWithin(points.get(pointIndex))) - throw new IllegalArgumentException("Polygon is not concave: Point " + points.get(pointIndex) + " Edge " + edge); - } - } - } - */ // For each edge, create a bounds object. eitherBounds = new HashMap<>(edges.length); @@ -241,6 +226,26 @@ class GeoConcavePolygon extends GeoBasePolygon { // Pick an edge point arbitrarily edgePoints = new GeoPoint[]{points.get(0)}; + + if (isWithinHoles(points.get(0))) { + throw new IllegalArgumentException("Polygon edge intersects a polygon hole; not allowed"); + } + + } + + /** Check if a point is within the provided holes. + *@param point point to check. + *@return true if the point is within any of the holes. + */ + protected boolean isWithinHoles(final GeoPoint point) { + if (holes != null) { + for (final GeoPolygon hole : holes) { + if (hole.isWithin(point)) { + return true; + } + } + } + return false; } /** Compute a legal point index from a possibly illegal one, that may have wrapped. diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java index 64aa7c45f96..17a2120db3f 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java @@ -203,21 +203,6 @@ class GeoConvexPolygon extends GeoBasePolygon { edges[i] = sp; notableEdgePoints[i] = new GeoPoint[]{start, end}; } - /* Disabled since GeoPolygonFactory does the checking too. - // In order to naively confirm that the polygon is convex, I would need to - // check every edge, and verify that every point (other than the edge endpoints) - // is within the edge's sided plane. This is an order n^2 operation. That's still - // not wrong, though, because everything else about polygons has a similar cost. - for (int edgeIndex = 0; edgeIndex < edges.length; edgeIndex++) { - final SidedPlane edge = edges[edgeIndex]; - for (int pointIndex = 0; pointIndex < points.size(); pointIndex++) { - if (pointIndex != edgeIndex && pointIndex != legalIndex(edgeIndex + 1)) { - if (!edge.isWithin(points.get(pointIndex))) - throw new IllegalArgumentException("Polygon is not convex: Point " + points.get(pointIndex) + " Edge " + edge); - } - } - } - */ // For each edge, create a bounds object. eitherBounds = new HashMap<>(edges.length); @@ -236,8 +221,28 @@ class GeoConvexPolygon extends GeoBasePolygon { // Pick an edge point arbitrarily edgePoints = new GeoPoint[]{points.get(0)}; + + if (isWithinHoles(points.get(0))) { + throw new IllegalArgumentException("Polygon edge intersects a polygon hole; not allowed"); + } + } + /** Check if a point is within the provided holes. + *@param point point to check. + *@return true if the point is within any of the holes. + */ + protected boolean isWithinHoles(final GeoPoint point) { + if (holes != null) { + for (final GeoPolygon hole : holes) { + if (hole.isWithin(point)) { + return true; + } + } + } + return false; + } + /** Compute a legal point index from a possibly illegal one, that may have wrapped. *@param index is the index. *@return the normalized index. From 97e8f1aeadd29207b8fdc6284ec7b6e4c60cce11 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Sat, 23 Apr 2016 06:44:42 -0400 Subject: [PATCH 07/18] LUCENE-7250: Handle holes properly for distance and relationship calculation. --- .../apache/lucene/spatial3d/Geo3DPoint.java | 17 ++++------ .../spatial3d/geom/GeoConcavePolygon.java | 33 ++++++++++++++++--- .../spatial3d/geom/GeoConvexPolygon.java | 33 ++++++++++++++++--- .../spatial3d/geom/GeoPolygonFactory.java | 6 ++-- 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/Geo3DPoint.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/Geo3DPoint.java index 4b5ab92764a..0f2395c890b 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/Geo3DPoint.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/Geo3DPoint.java @@ -142,7 +142,7 @@ public final class Geo3DPoint extends Field { } final GeoShape shape; if (polygons.length == 1) { - final GeoShape component = fromPolygon(polygons[0], false); + final GeoShape component = fromPolygon(polygons[0]); if (component == null) { // Polygon is degenerate shape = new GeoCompositePolygon(); @@ -152,7 +152,7 @@ public final class Geo3DPoint extends Field { } else { final GeoCompositePolygon poly = new GeoCompositePolygon(); for (final Polygon p : polygons) { - final GeoPolygon component = fromPolygon(p, false); + final GeoPolygon component = fromPolygon(p); if (component != null) { poly.addShape(component); } @@ -192,17 +192,16 @@ public final class Geo3DPoint extends Field { * Convert a Polygon object into a GeoPolygon. * This method uses * @param polygon is the Polygon object. - * @param reverseMe is true if the order of the points should be reversed. * @return the GeoPolygon. */ - private static GeoPolygon fromPolygon(final Polygon polygon, final boolean reverseMe) { + private static GeoPolygon fromPolygon(final Polygon polygon) { // First, assemble the "holes". The geo3d convention is to use the same polygon sense on the inner ring as the // outer ring, so we process these recursively with reverseMe flipped. final Polygon[] theHoles = polygon.getHoles(); final List holeList = new ArrayList<>(theHoles.length); for (final Polygon hole : theHoles) { //System.out.println("Hole: "+hole); - final GeoPolygon component = fromPolygon(hole, !reverseMe); + final GeoPolygon component = fromPolygon(hole); if (component != null) { holeList.add(component); } @@ -216,12 +215,8 @@ public final class Geo3DPoint extends Field { final List points = new ArrayList<>(polyLats.length-1); // We skip the last point anyway because the API requires it to be repeated, and geo3d doesn't repeat it. for (int i = 0; i < polyLats.length - 1; i++) { - if (reverseMe) { - points.add(new GeoPoint(PlanetModel.WGS84, fromDegrees(polyLats[i]), fromDegrees(polyLons[i]))); - } else { - final int index = polyLats.length - 2 - i; - points.add(new GeoPoint(PlanetModel.WGS84, fromDegrees(polyLats[index]), fromDegrees(polyLons[index]))); - } + final int index = polyLats.length - 2 - i; + points.add(new GeoPoint(PlanetModel.WGS84, fromDegrees(polyLats[index]), fromDegrees(polyLons[index]))); } //System.err.println(" building polygon with "+points.size()+" points..."); final GeoPolygon rval = GeoPolygonFactory.makeGeoPolygon(PlanetModel.WGS84, points, holeList); diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java index 8eaea1a4937..c18d40f75d4 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConcavePolygon.java @@ -224,8 +224,25 @@ class GeoConcavePolygon extends GeoBasePolygon { eitherBounds.put(edge, new EitherBound(invertedEdges[legalIndex(bound1Index)], invertedEdges[legalIndex(bound2Index)])); } - // Pick an edge point arbitrarily - edgePoints = new GeoPoint[]{points.get(0)}; + // Pick an edge point arbitrarily from the outer polygon. Glom this together with all edge points from + // inner polygons. + int edgePointCount = 1; + if (holes != null) { + for (final GeoPolygon hole : holes) { + edgePointCount += hole.getEdgePoints().length; + } + } + edgePoints = new GeoPoint[edgePointCount]; + edgePointCount = 0; + edgePoints[edgePointCount++] = points.get(0); + if (holes != null) { + for (final GeoPolygon hole : holes) { + final GeoPoint[] holeEdgePoints = hole.getEdgePoints(); + for (final GeoPoint p : holeEdgePoints) { + edgePoints[edgePointCount++] = p; + } + } + } if (isWithinHoles(points.get(0))) { throw new IllegalArgumentException("Polygon edge intersects a polygon hole; not allowed"); @@ -240,7 +257,7 @@ class GeoConcavePolygon extends GeoBasePolygon { protected boolean isWithinHoles(final GeoPoint point) { if (holes != null) { for (final GeoPolygon hole : holes) { - if (hole.isWithin(point)) { + if (!hole.isWithin(point)) { return true; } } @@ -268,7 +285,7 @@ class GeoConcavePolygon extends GeoBasePolygon { } if (holes != null) { for (final GeoPolygon polygon : holes) { - if (polygon.isWithin(x, y, z)) { + if (!polygon.isWithin(x, y, z)) { return false; } } @@ -405,6 +422,14 @@ class GeoConcavePolygon extends GeoBasePolygon { minimumDistance = newDist; } } + if (holes != null) { + for (final GeoPolygon hole : holes) { + double holeDistance = hole.computeOutsideDistance(distanceStyle, x, y, z); + if (holeDistance != 0.0 && holeDistance < minimumDistance) { + minimumDistance = holeDistance; + } + } + } return minimumDistance; } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java index 17a2120db3f..6f71d18bb5b 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoConvexPolygon.java @@ -219,8 +219,25 @@ class GeoConvexPolygon extends GeoBasePolygon { eitherBounds.put(edge, new EitherBound(edges[legalIndex(bound1Index)], edges[legalIndex(bound2Index)])); } - // Pick an edge point arbitrarily - edgePoints = new GeoPoint[]{points.get(0)}; + // Pick an edge point arbitrarily from the outer polygon. Glom this together with all edge points from + // inner polygons. + int edgePointCount = 1; + if (holes != null) { + for (final GeoPolygon hole : holes) { + edgePointCount += hole.getEdgePoints().length; + } + } + edgePoints = new GeoPoint[edgePointCount]; + edgePointCount = 0; + edgePoints[edgePointCount++] = points.get(0); + if (holes != null) { + for (final GeoPolygon hole : holes) { + final GeoPoint[] holeEdgePoints = hole.getEdgePoints(); + for (final GeoPoint p : holeEdgePoints) { + edgePoints[edgePointCount++] = p; + } + } + } if (isWithinHoles(points.get(0))) { throw new IllegalArgumentException("Polygon edge intersects a polygon hole; not allowed"); @@ -235,7 +252,7 @@ class GeoConvexPolygon extends GeoBasePolygon { protected boolean isWithinHoles(final GeoPoint point) { if (holes != null) { for (final GeoPolygon hole : holes) { - if (hole.isWithin(point)) { + if (!hole.isWithin(point)) { return true; } } @@ -263,7 +280,7 @@ class GeoConvexPolygon extends GeoBasePolygon { } if (holes != null) { for (final GeoPolygon polygon : holes) { - if (polygon.isWithin(x, y, z)) { + if (!polygon.isWithin(x, y, z)) { return false; } } @@ -393,6 +410,14 @@ class GeoConvexPolygon extends GeoBasePolygon { minimumDistance = newDist; } } + if (holes != null) { + for (final GeoPolygon hole : holes) { + double holeDistance = hole.computeOutsideDistance(distanceStyle, x, y, z); + if (holeDistance != 0.0 && holeDistance < minimumDistance) { + minimumDistance = holeDistance; + } + } + } return minimumDistance; } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java index 99fc7c90e4d..609c8643c62 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoPolygonFactory.java @@ -56,7 +56,8 @@ public class GeoPolygonFactory { * @param pointList is a list of the GeoPoints to build an arbitrary polygon out of. If points go * clockwise from a given pole, then that pole should be within the polygon. If points go * counter-clockwise, then that pole should be outside the polygon. - * @param holes is a list of polygons representing "holes" in the outside polygon. Null == none. + * @param holes is a list of polygons representing "holes" in the outside polygon. Holes describe the area outside + * each hole as being "in set". Null == none. * @return a GeoPolygon corresponding to what was specified, or null if a valid polygon cannot be generated * from this input. */ @@ -73,7 +74,8 @@ public class GeoPolygonFactory { * @param pointList is a list of the GeoPoints to build an arbitrary polygon out of. If points go * clockwise from a given pole, then that pole should be within the polygon. If points go * counter-clockwise, then that pole should be outside the polygon. - * @param holes is a list of polygons representing "holes" in the outside polygon. Null == none. + * @param holes is a list of polygons representing "holes" in the outside polygon. Holes describe the area outside + * each hole as being "in set". Null == none. * @param leniencyValue is the maximum distance (in units) that a point can be from the plane and still be considered as * belonging to the plane. Any value greater than zero may cause some of the provided points that are in fact outside * the strict definition of co-planarity, but are within this distance, to be discarded for the purposes of creating a From 69be7dc2a3ebe9e5170c8d0c5079d863ce73dab6 Mon Sep 17 00:00:00 2001 From: Karl Wright Date: Sun, 24 Apr 2016 02:23:05 -0400 Subject: [PATCH 08/18] Two nested classes made private that should never have been public. --- .../org/apache/lucene/spatial3d/geom/GeoStandardPath.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoStandardPath.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoStandardPath.java index 312b79e4a25..b24d5afb913 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoStandardPath.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoStandardPath.java @@ -336,7 +336,7 @@ class GeoStandardPath extends GeoBasePath { * we generate no circle at all. If there is one intersection only, then we generate a plane that includes that intersection, as well as the remaining * cutoff plane/edge plane points. */ - public static class SegmentEndpoint { + private static class SegmentEndpoint { /** The center point of the endpoint */ public final GeoPoint point; /** A plane describing the circle */ @@ -580,7 +580,7 @@ class GeoStandardPath extends GeoBasePath { /** * This is the pre-calculated data for a path segment. */ - public static class PathSegment { + private static class PathSegment { /** Starting point of the segment */ public final GeoPoint start; /** End point of the segment */ From 7acf8babae2401300a5844ebd10da2219c99dd40 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Sun, 24 Apr 2016 05:58:41 -0400 Subject: [PATCH 09/18] LUCENE-7175: give enough heap for large dim count, bytes per dim, when writing points --- .../src/test/org/apache/lucene/search/TestPointQueries.java | 2 +- .../src/java/org/apache/lucene/index/RandomCodec.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java index bc9ef759a18..88d89d29417 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java @@ -1153,7 +1153,7 @@ public class TestPointQueries extends LuceneTestCase { private static Codec getCodec() { if (Codec.getDefault().getName().equals("Lucene60")) { int maxPointsInLeafNode = TestUtil.nextInt(random(), 16, 2048); - double maxMBSortInHeap = 4.0 + (3*random().nextDouble()); + double maxMBSortInHeap = 5.0 + (3*random().nextDouble()); if (VERBOSE) { System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index 73db88b1067..c1c33f895c2 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -196,7 +196,7 @@ public class RandomCodec extends AssertingCodec { int lowFreqCutoff = TestUtil.nextInt(random, 2, 100); maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048); - maxMBSortInHeap = 4.0 + (3*random.nextDouble()); + maxMBSortInHeap = 5.0 + (3*random.nextDouble()); bkdSplitRandomSeed = random.nextInt(); add(avoidCodecs, From 45c48da54ad2bf09fd4c9559ba1c776ad9460d82 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 24 Apr 2016 17:15:30 -0400 Subject: [PATCH 10/18] LUCENE-7240: Remove DocValues from LatLonPoint, add DocValuesField for that --- lucene/CHANGES.txt | 4 +- .../lucene/document/LatLonDocValuesField.java | 135 ++++++++++++++++++ .../apache/lucene/document/LatLonPoint.java | 65 +++------ .../LatLonPointDistanceComparator.java | 2 +- .../document/TestLatLonDocValuesField.java | 30 ++++ .../lucene/document/TestLatLonPoint.java | 3 - .../document/TestLatLonPointDistanceSort.java | 20 +-- .../apache/lucene/document/TestNearest.java | 3 +- 8 files changed, 197 insertions(+), 65 deletions(-) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java create mode 100644 lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 848e0220839..dc4cfcba4d5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -10,8 +10,8 @@ http://s.apache.org/luceneversions New Features -* LUCENE-7099: Add LatLonPoint.newDistanceSort to the sandbox's - LatLonPoint. (Robert Muir) +* LUCENE-7099: Add LatLonDocValuesField.newDistanceSort to the sandbox. + (Robert Muir) * LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via Mike McCandless) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java new file mode 100644 index 00000000000..20154d25505 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.SortField; + +/** + * An per-document location field. + *

+ * Sorting by distance is efficient. Multiple values for the same field in one document + * is allowed. + *

+ * This field defines static factory methods for common operations: + *

    + *
  • {@link #newDistanceSort newDistanceSort()} for ordering documents by distance from a specified location. + *
+ *

+ * If you also need query operations, you should add a separate {@link LatLonPoint} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *

+ * WARNING: Values are indexed with some loss of precision from the + * original {@code double} values (4.190951585769653E-8 for the latitude component + * and 8.381903171539307E-8 for longitude). + * @see LatLonPoint + */ +public class LatLonDocValuesField extends Field { + + /** + * Type for a LatLonDocValuesField + *

+ * Each value stores a 64-bit long where the upper 32 bits are the encoded latitude, + * and the lower 32 bits are the encoded longitude. + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) + */ + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); + TYPE.freeze(); + } + + /** + * Creates a new LatLonDocValuesField with the specified latitude and longitude + * @param name field name + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if the field name is null or latitude or longitude are out of bounds + */ + public LatLonDocValuesField(String name, double latitude, double longitude) { + super(name, TYPE); + setLocationValue(latitude, longitude); + } + + /** + * Change the values of this field + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if latitude or longitude are out of bounds + */ + public void setLocationValue(double latitude, double longitude) { + int latitudeEncoded = encodeLatitude(latitude); + int longitudeEncoded = encodeLongitude(longitude); + fieldsData = Long.valueOf((((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL)); + } + + /** helper: checks a fieldinfo and throws exception if its definitely not a LatLonDocValuesField */ + static void checkCompatible(FieldInfo fieldInfo) { + // dv properties could be "unset", if you e.g. used only StoredField with this same name in the segment. + if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { + throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + + " but this type has docValuesType=" + TYPE.docValuesType() + + ", is the field really a LatLonDocValuesField?"); + } + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(getClass().getSimpleName()); + result.append(" <"); + result.append(name); + result.append(':'); + + long currentValue = Long.valueOf((Long)fieldsData); + result.append(decodeLatitude((int)(currentValue >> 32))); + result.append(','); + result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + + result.append('>'); + return result.toString(); + } + + /** + * Creates a SortField for sorting by distance from a location. + *

+ * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for + * the hits contains a Double instance with the distance in meters. + *

+ * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance + * (missing values sort last). + *

+ * If a document contains multiple values for the field, the closest distance to the location is used. + * + * @param field field name. must not be null. + * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. + * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. + * @return SortField ordering documents by distance + * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. + */ + public static SortField newDistanceSort(String field, double latitude, double longitude) { + return new LatLonPointSortField(field, latitude, longitude); + } +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index 0f6afe9116f..426a7022a6a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat; import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.Polygon; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -38,7 +37,6 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -63,21 +61,23 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil; *

    *
  • {@link #newBoxQuery newBoxQuery()} for matching points within a bounding box. *
  • {@link #newDistanceQuery newDistanceQuery()} for matching points within a specified distance. - *
  • {@link #newDistanceSort newDistanceSort()} for ordering documents by distance from a specified location. *
  • {@link #newPolygonQuery newPolygonQuery()} for matching points within an arbitrary polygon. *
  • {@link #nearest nearest()} for finding the k-nearest neighbors by distance. *
*

+ * If you also need per-document operations such as sort by distance, add a separate {@link LatLonDocValuesField} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *

* WARNING: Values are indexed with some loss of precision from the * original {@code double} values (4.190951585769653E-8 for the latitude component * and 8.381903171539307E-8 for longitude). * @see PointValues + * @see LatLonDocValuesField */ // TODO ^^^ that is very sandy and hurts the API, usage, and tests tremendously, because what the user passes // to the field is not actually what gets indexed. Float would be 1E-5 error vs 1E-7, but it might be // a better tradeoff? then it would be completely transparent to the user and lucene would be "lossless". public class LatLonPoint extends Field { - private long currentValue; /** * Type for an indexed LatLonPoint @@ -87,7 +87,6 @@ public class LatLonPoint extends Field { public static final FieldType TYPE = new FieldType(); static { TYPE.setDimensions(2, Integer.BYTES); - TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); TYPE.freeze(); } @@ -98,13 +97,19 @@ public class LatLonPoint extends Field { * @throws IllegalArgumentException if latitude or longitude are out of bounds */ public void setLocationValue(double latitude, double longitude) { - byte[] bytes = new byte[8]; + final byte[] bytes; + + if (fieldsData == null) { + bytes = new byte[8]; + fieldsData = new BytesRef(bytes); + } else { + bytes = ((BytesRef) fieldsData).bytes; + } + int latitudeEncoded = encodeLatitude(latitude); int longitudeEncoded = encodeLongitude(longitude); NumericUtils.intToSortableBytes(latitudeEncoded, bytes, 0); NumericUtils.intToSortableBytes(longitudeEncoded, bytes, Integer.BYTES); - fieldsData = new BytesRef(bytes); - currentValue = (((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL); } /** @@ -127,24 +132,14 @@ public class LatLonPoint extends Field { result.append(name); result.append(':'); - result.append(decodeLatitude((int)(currentValue >> 32))); + byte bytes[] = ((BytesRef) fieldsData).bytes; + result.append(decodeLatitude(bytes, 0)); result.append(','); - result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + result.append(decodeLongitude(bytes, Integer.BYTES)); result.append('>'); return result.toString(); } - - /** - * Returns a 64-bit long, where the upper 32 bits are the encoded latitude, - * and the lower 32 bits are the encoded longitude. - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) - */ - @Override - public Number numericValue() { - return currentValue; - } /** sugar encodes a single point as a byte array */ private static byte[] encode(double latitude, double longitude) { @@ -175,11 +170,6 @@ public class LatLonPoint extends Field { " but this point type has bytesPerDim=" + TYPE.pointNumBytes() + ", is the field really a LatLonPoint?"); } - if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { - throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + - " but this point type has docValuesType=" + TYPE.docValuesType() + - ", is the field really a LatLonPoint?"); - } } // static methods for generating queries @@ -278,31 +268,10 @@ public class LatLonPoint extends Field { return new LatLonPointInPolygonQuery(field, polygons); } - /** - * Creates a SortField for sorting by distance from a location. - *

- * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for - * the hits contains a Double instance with the distance in meters. - *

- * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance - * (missing values sort last). - *

- * If a document contains multiple values for the field, the closest distance to the location is used. - * - * @param field field name. must not be null. - * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. - * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. - * @return SortField ordering documents by distance - * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. - */ - public static SortField newDistanceSort(String field, double latitude, double longitude) { - return new LatLonPointSortField(field, latitude, longitude); - } - /** * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance. *

- * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link #newDistanceSort}, + * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort}, * but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this * only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are * currently not de-duplicated, so if a document had multiple instances of the specified field that diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java index 0306489da3c..0b1d0c7d848 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java @@ -158,7 +158,7 @@ class LatLonPointDistanceComparator extends FieldComparator implements L LeafReader reader = context.reader(); FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info != null) { - LatLonPoint.checkCompatible(info); + LatLonDocValuesField.checkCompatible(info); } currentDocs = DocValues.getSortedNumeric(reader, field); return this; diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java new file mode 100644 index 00000000000..df934d13406 --- /dev/null +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import org.apache.lucene.util.LuceneTestCase; + +/** Simple tests for LatLonDocValuesField */ +public class TestLatLonDocValuesField extends LuceneTestCase { + public void testToString() throws Exception { + // looks crazy due to lossiness + assertEquals("LatLonDocValuesField ",(new LatLonDocValuesField("field", 18.313694, -65.227444)).toString()); + + // sort field + assertEquals("", LatLonDocValuesField.newDistanceSort("field", 18.0, 19.0).toString()); + } +} diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java index acc9186b14f..700eb56e459 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java @@ -33,8 +33,5 @@ public class TestLatLonPoint extends LuceneTestCase { // distance query does not quantize inputs assertEquals("field:18.0,19.0 +/- 25.0 meters", LatLonPoint.newDistanceQuery("field", 18, 19, 25).toString()); - - // sort field - assertEquals("", LatLonPoint.newDistanceSort("field", 18.0, 19.0).toString()); } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java index e40f33d864b..6d825d2b76c 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java @@ -40,7 +40,7 @@ import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; -/** Simple tests for {@link LatLonPoint#newDistanceSort} */ +/** Simple tests for {@link LatLonDocValuesField#newDistanceSort} */ public class TestLatLonPointDistanceSort extends LuceneTestCase { /** Add three points and sort by distance */ @@ -50,22 +50,22 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { // add some docs Document doc = new Document(); - doc.add(new LatLonPoint("location", 40.759011, -73.9844722)); + doc.add(new LatLonDocValuesField("location", 40.759011, -73.9844722)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305)); iw.addDocument(doc); IndexReader reader = iw.getReader(); IndexSearcher searcher = newSearcher(reader); iw.close(); - Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731)); + Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731)); TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); FieldDoc d = (FieldDoc) td.scoreDocs[0]; @@ -91,18 +91,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305)); iw.addDocument(doc); IndexReader reader = iw.getReader(); IndexSearcher searcher = newSearcher(reader); iw.close(); - Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731)); + Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731)); TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); FieldDoc d = (FieldDoc) td.scoreDocs[0]; @@ -199,7 +199,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { double lat = decodeLatitude(encodeLatitude(latRaw)); double lon = decodeLongitude(encodeLongitude(lonRaw)); - doc.add(new LatLonPoint("field", lat, lon)); + doc.add(new LatLonDocValuesField("field", lat, lon)); doc.add(new StoredField("lat", lat)); doc.add(new StoredField("lon", lon)); } // otherwise "missing" @@ -234,7 +234,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { // randomize the topN a bit int topN = TestUtil.nextInt(random(), 1, reader.maxDoc()); // sort by distance, then ID - SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon); + SortField distanceSort = LatLonDocValuesField.newDistanceSort("field", lat, lon); distanceSort.setMissingValue(missingValue); Sort sort = new Sort(distanceSort, new SortField("id", SortField.Type.INT)); diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java index fc073c7b572..66630df2bca 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java @@ -166,6 +166,7 @@ public class TestNearest extends LuceneTestCase { lons[id] = quantizeLon(GeoTestUtil.nextLongitude()); Document doc = new Document(); doc.add(new LatLonPoint("point", lats[id], lons[id])); + doc.add(new LatLonDocValuesField("point", lats[id], lons[id])); doc.add(new StoredField("id", id)); w.addDocument(doc); } @@ -216,7 +217,7 @@ public class TestNearest extends LuceneTestCase { } // Also test with MatchAllDocsQuery, sorting by distance: - TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonPoint.newDistanceSort("point", pointLat, pointLon))); + TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon))); ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs; for(int i=0;i Date: Sun, 24 Apr 2016 18:47:50 -0400 Subject: [PATCH 11/18] implement grow() for polygon queries too: easy speedup. --- .../apache/lucene/document/LatLonPointInPolygonQuery.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index 23a98d22c77..e68cb4579c6 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -120,6 +120,11 @@ final class LatLonPointInPolygonQuery extends Query { values.intersect(field, new IntersectVisitor() { + @Override + public void grow(int count) { + result.grow(count); + } + @Override public void visit(int docID) { result.add(docID); From e3e9114921da4b208960b1da980a7c8c1ba7b1f7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 24 Apr 2016 20:09:05 -0400 Subject: [PATCH 12/18] implement grow() for spatial3d intersector: easy speedup --- .../lucene/spatial3d/PointInShapeIntersectVisitor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java index d4e730999dc..cf94c35b2c9 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInShapeIntersectVisitor.java @@ -38,6 +38,11 @@ class PointInShapeIntersectVisitor implements IntersectVisitor { this.shapeBounds = shapeBounds; } + @Override + public void grow(int count) { + hits.grow(count); + } + @Override public void visit(int docID) { hits.add(docID); From fd7b2159d8cc5d814c4d7f47f64945d8f4f5426f Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Mon, 25 Apr 2016 09:39:29 +0100 Subject: [PATCH 13/18] LUCENE-7247: TestCoreParser.dumpResults verbose and test-fail logging tweaks --- .../apache/lucene/queryparser/xml/TestCoreParser.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java index 4cf5fcffe2f..f252600d954 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java @@ -221,13 +221,15 @@ public class TestCoreParser extends LuceneTestCase { protected void dumpResults(String qType, Query q, int numDocs) throws IOException { if (VERBOSE) { - System.out.println("TEST: qType=" + qType + " query=" + q + " numDocs=" + numDocs); + System.out.println("TEST: qType=" + qType + " numDocs=" + numDocs + " " + q.getClass().getCanonicalName() + " query=" + q); } final IndexSearcher searcher = searcher(); TopDocs hits = searcher.search(q, numDocs); - assertTrue(qType + " should produce results ", hits.totalHits > 0); + final boolean producedResults = (hits.totalHits > 0); + if (!producedResults) { + System.out.println("TEST: qType=" + qType + " numDocs=" + numDocs + " " + q.getClass().getCanonicalName() + " query=" + q); + } if (VERBOSE) { - System.out.println("=========" + qType + "============"); ScoreDoc[] scoreDocs = hits.scoreDocs; for (int i = 0; i < Math.min(numDocs, hits.totalHits); i++) { Document ldoc = searcher.doc(scoreDocs[i].doc); @@ -235,5 +237,6 @@ public class TestCoreParser extends LuceneTestCase { } System.out.println(); } + assertTrue(qType + " produced no results", producedResults); } } From fe795c9f7a5e936fe7ed6dd33e5d39105624683e Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Mon, 25 Apr 2016 11:14:39 -0400 Subject: [PATCH 14/18] fix stale javadocs --- .../core/src/java/org/apache/lucene/document/StringField.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/StringField.java b/lucene/core/src/java/org/apache/lucene/document/StringField.java index b3c7fe0106b..7b968b67f72 100644 --- a/lucene/core/src/java/org/apache/lucene/document/StringField.java +++ b/lucene/core/src/java/org/apache/lucene/document/StringField.java @@ -23,8 +23,8 @@ import org.apache.lucene.util.BytesRef; /** A field that is indexed but not tokenized: the entire * String value is indexed as a single token. For example * this might be used for a 'country' field or an 'id' - * field, or any field that you intend to use for sorting - * or access through the field cache. */ + * field. If you also need to sort on this field, separately + * add a {@link SortedDocValuesField} to your document. */ public final class StringField extends Field { From 837264a42ebe3e6091a64b2d0410ee7c63eebb1f Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 25 Apr 2016 15:29:54 -0400 Subject: [PATCH 15/18] LUCENE-7251: remove LatLonGrid, remove slow polygon methods, speed up multiple components --- .../java/org/apache/lucene/geo/Polygon.java | 221 ------------ .../org/apache/lucene/geo/Polygon2D.java} | 195 +++++++---- .../org/apache/lucene/geo/TestPolygon.java | 330 ------------------ .../org/apache/lucene/geo/TestPolygon2D.java | 289 +++++++++++++++ .../apache/lucene/document/LatLonGrid.java | 168 --------- .../document/LatLonPointInPolygonQuery.java | 22 +- .../lucene/document/TestLatLonGrid.java | 106 ------ .../lucene/document/TestLatLonTree.java | 53 --- .../search/GeoPointInPolygonQueryImpl.java | 16 +- .../lucene/geo/BaseGeoPointTestCase.java | 2 +- .../org/apache/lucene/geo/GeoTestUtil.java | 54 +++ 11 files changed, 493 insertions(+), 963 deletions(-) rename lucene/{sandbox/src/java/org/apache/lucene/document/LatLonTree.java => core/src/java/org/apache/lucene/geo/Polygon2D.java} (72%) create mode 100644 lucene/core/src/test/org/apache/lucene/geo/TestPolygon2D.java delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java delete mode 100644 lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java delete mode 100644 lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonTree.java diff --git a/lucene/core/src/java/org/apache/lucene/geo/Polygon.java b/lucene/core/src/java/org/apache/lucene/geo/Polygon.java index 361c199fc31..3b5dec9ca2d 100644 --- a/lucene/core/src/java/org/apache/lucene/geo/Polygon.java +++ b/lucene/core/src/java/org/apache/lucene/geo/Polygon.java @@ -18,8 +18,6 @@ package org.apache.lucene.geo; import java.util.Arrays; -import org.apache.lucene.index.PointValues.Relation; - /** * Represents a closed polygon on the earth's surface. *

@@ -48,8 +46,6 @@ public final class Polygon { /** maximum longitude of this polygon's bounding box area */ public final double maxLon; - // TODO: we could also compute the maximal inner bounding box, to make relations faster to compute? - /** * Creates a new Polygon from the supplied latitude/longitude array, and optionally any holes. */ @@ -110,200 +106,6 @@ public final class Polygon { this.maxLon = maxLon; } - /** - * Returns true if the point is contained within this polygon. - *

- * See - * https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html for more information. - */ - // ported to java from https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html - // original code under the BSD license (https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html#License%20to%20Use) - // - // Copyright (c) 1970-2003, Wm. Randolph Franklin - // - // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated - // documentation files (the "Software"), to deal in the Software without restriction, including without limitation - // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and - // to permit persons to whom the Software is furnished to do so, subject to the following conditions: - // - // 1. Redistributions of source code must retain the above copyright - // notice, this list of conditions and the following disclaimers. - // 2. Redistributions in binary form must reproduce the above copyright - // notice in the documentation and/or other materials provided with - // the distribution. - // 3. The name of W. Randolph Franklin may not be used to endorse or - // promote products derived from this Software without specific - // prior written permission. - // - // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - // CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - // IN THE SOFTWARE. - public boolean contains(double latitude, double longitude) { - // check bounding box - if (latitude < minLat || latitude > maxLat || longitude < minLon || longitude > maxLon) { - return false; - } - - boolean inPoly = false; - boolean previous = polyLats[0] > latitude; - for (int i = 1; i < polyLats.length; i++) { - boolean current = polyLats[i] > latitude; - if (current != previous) { - if (longitude < (polyLons[i-1] - polyLons[i]) * (latitude - polyLats[i]) / (polyLats[i-1] - polyLats[i]) + polyLons[i]) { - inPoly = !inPoly; - } - previous = current; - } - } - if (inPoly) { - for (Polygon hole : holes) { - if (hole.contains(latitude, longitude)) { - return false; - } - } - return true; - } else { - return false; - } - } - - /** Returns relation to the provided rectangle */ - public Relation relate(double minLat, double maxLat, double minLon, double maxLon) { - // if the bounding boxes are disjoint then the shape does not cross - if (maxLon < this.minLon || minLon > this.maxLon || maxLat < this.minLat || minLat > this.maxLat) { - return Relation.CELL_OUTSIDE_QUERY; - } - // if the rectangle fully encloses us, we cross. - if (minLat <= this.minLat && maxLat >= this.maxLat && minLon <= this.minLon && maxLon >= this.maxLon) { - return Relation.CELL_CROSSES_QUERY; - } - // check any holes - for (Polygon hole : holes) { - Relation holeRelation = hole.relate(minLat, maxLat, minLon, maxLon); - if (holeRelation == Relation.CELL_CROSSES_QUERY) { - return Relation.CELL_CROSSES_QUERY; - } else if (holeRelation == Relation.CELL_INSIDE_QUERY) { - return Relation.CELL_OUTSIDE_QUERY; - } - } - // check each corner: if < 4 are present, its cheaper than crossesSlowly - int numCorners = numberOfCorners(minLat, maxLat, minLon, maxLon); - if (numCorners == 4) { - if (crossesSlowly(minLat, maxLat, minLon, maxLon)) { - return Relation.CELL_CROSSES_QUERY; - } - return Relation.CELL_INSIDE_QUERY; - } else if (numCorners > 0) { - return Relation.CELL_CROSSES_QUERY; - } - - // we cross - if (crossesSlowly(minLat, maxLat, minLon, maxLon)) { - return Relation.CELL_CROSSES_QUERY; - } - - return Relation.CELL_OUTSIDE_QUERY; - } - - // returns 0, 4, or something in between - private int numberOfCorners(double minLat, double maxLat, double minLon, double maxLon) { - int containsCount = 0; - if (contains(minLat, minLon)) { - containsCount++; - } - if (contains(minLat, maxLon)) { - containsCount++; - } - if (containsCount == 1) { - return containsCount; - } - if (contains(maxLat, maxLon)) { - containsCount++; - } - if (containsCount == 2) { - return containsCount; - } - if (contains(maxLat, minLon)) { - containsCount++; - } - return containsCount; - } - - /** Returns true if the box crosses our polygon */ - private boolean crossesSlowly(double minLat, double maxLat, double minLon, double maxLon) { - // we compute line intersections of every polygon edge with every box line. - // if we find one, return true. - // for each box line (AB): - // for each poly line (CD): - // intersects = orient(C,D,A) * orient(C,D,B) <= 0 && orient(A,B,C) * orient(A,B,D) <= 0 - for (int i = 1; i < polyLons.length; i++) { - double cy = polyLats[i - 1]; - double dy = polyLats[i]; - double cx = polyLons[i - 1]; - double dx = polyLons[i]; - - // optimization: see if the rectangle is outside of the "bounding box" of the polyline at all - // if not, don't waste our time trying more complicated stuff - if ((cy < minLat && dy < minLat) || - (cy > maxLat && dy > maxLat) || - (cx < minLon && dx < minLon) || - (cx > maxLon && dx > maxLon)) { - continue; - } - - // does box's top edge intersect polyline? - // ax = minLon, bx = maxLon, ay = maxLat, by = maxLat - if (orient(cx, cy, dx, dy, minLon, maxLat) * orient(cx, cy, dx, dy, maxLon, maxLat) <= 0 && - orient(minLon, maxLat, maxLon, maxLat, cx, cy) * orient(minLon, maxLat, maxLon, maxLat, dx, dy) <= 0) { - return true; - } - - // does box's right edge intersect polyline? - // ax = maxLon, bx = maxLon, ay = maxLat, by = minLat - if (orient(cx, cy, dx, dy, maxLon, maxLat) * orient(cx, cy, dx, dy, maxLon, minLat) <= 0 && - orient(maxLon, maxLat, maxLon, minLat, cx, cy) * orient(maxLon, maxLat, maxLon, minLat, dx, dy) <= 0) { - return true; - } - - // does box's bottom edge intersect polyline? - // ax = maxLon, bx = minLon, ay = minLat, by = minLat - if (orient(cx, cy, dx, dy, maxLon, minLat) * orient(cx, cy, dx, dy, minLon, minLat) <= 0 && - orient(maxLon, minLat, minLon, minLat, cx, cy) * orient(maxLon, minLat, minLon, minLat, dx, dy) <= 0) { - return true; - } - - // does box's left edge intersect polyline? - // ax = minLon, bx = minLon, ay = minLat, by = maxLat - if (orient(cx, cy, dx, dy, minLon, minLat) * orient(cx, cy, dx, dy, minLon, maxLat) <= 0 && - orient(minLon, minLat, minLon, maxLat, cx, cy) * orient(minLon, minLat, minLon, maxLat, dx, dy) <= 0) { - return true; - } - } - return false; - } - - /** - * Returns a positive value if points a, b, and c are arranged in counter-clockwise order, - * negative value if clockwise, zero if collinear. - */ - // see the "Orient2D" method described here: - // http://www.cs.berkeley.edu/~jrs/meshpapers/robnotes.pdf - // https://www.cs.cmu.edu/~quake/robust.html - // Note that this one does not yet have the floating point tricks to be exact! - private static int orient(double ax, double ay, double bx, double by, double cx, double cy) { - double v1 = (bx - ax) * (cy - ay); - double v2 = (cx - ax) * (by - ay); - if (v1 > v2) { - return 1; - } else if (v1 < v2) { - return -1; - } else { - return 0; - } - } - /** Returns a copy of the internal latitude array */ public double[] getPolyLats() { return polyLats.clone(); @@ -319,29 +121,6 @@ public final class Polygon { return holes.clone(); } - /** Helper for multipolygon logic: returns true if any of the supplied polygons contain the point */ - public static boolean contains(Polygon[] polygons, double latitude, double longitude) { - for (Polygon polygon : polygons) { - if (polygon.contains(latitude, longitude)) { - return true; - } - } - return false; - } - - /** Returns the multipolygon relation for the rectangle */ - public static Relation relate(Polygon[] polygons, double minLat, double maxLat, double minLon, double maxLon) { - for (Polygon polygon : polygons) { - Relation relation = polygon.relate(minLat, maxLat, minLon, maxLon); - if (relation != Relation.CELL_OUTSIDE_QUERY) { - // note: we optimize for non-overlapping multipolygons. so if we cross one, - // we won't keep iterating to try to find a contains. - return relation; - } - } - return Relation.CELL_OUTSIDE_QUERY; - } - @Override public int hashCode() { final int prime = 31; diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java b/lucene/core/src/java/org/apache/lucene/geo/Polygon2D.java similarity index 72% rename from lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java rename to lucene/core/src/java/org/apache/lucene/geo/Polygon2D.java index f7a2927e2fb..320d71a2284 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonTree.java +++ b/lucene/core/src/java/org/apache/lucene/geo/Polygon2D.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.document; +package org.apache.lucene.geo; import java.util.Arrays; @@ -24,38 +24,55 @@ import org.apache.lucene.index.PointValues.Relation; /** * 2D polygon implementation represented as a balanced interval tree of edges. *

- * contains() and crosses() are still O(n), but for most practical polygons - * are much faster than brute force. + * Construction takes {@code O(n log n)} time for sorting and tree construction. + * {@link #contains contains()} and {@link #relate relate()} are {@code O(n)}, but for most + * practical polygons are much faster than brute force. *

* Loosely based on the algorithm described in * http://www-ma2.upc.es/geoc/Schirra-pointPolygon.pdf. + * @lucene.internal */ // Both Polygon.contains() and Polygon.crossesSlowly() loop all edges, and first check that the edge is within a range. // we just organize the edges to do the same computations on the same subset of edges more efficiently. -// TODO: clean this up, call it Polygon2D, and remove all the 2D methods from Polygon? -final class LatLonTree { - private final LatLonTree[] holes; - +public final class Polygon2D { /** minimum latitude of this polygon's bounding box area */ - final double minLat; + public final double minLat; /** maximum latitude of this polygon's bounding box area */ - final double maxLat; + public final double maxLat; /** minimum longitude of this polygon's bounding box area */ - final double minLon; + public final double minLon; /** maximum longitude of this polygon's bounding box area */ - final double maxLon; + public final double maxLon; - /** root node of our tree */ - final Edge tree; + // each component/hole is a node in an augmented 2d kd-tree: we alternate splitting between latitude/longitude, + // and pull up max values for both dimensions to each parent node (regardless of split). - // TODO: "pack" all the gons and holes into one tree with separator. - // the algorithms support this, but we have to be careful. - LatLonTree(Polygon polygon, LatLonTree... holes) { - this.holes = holes.clone(); + /** maximum latitude of this component or any of its children */ + private double maxY; + /** maximum longitude of this component or any of its children */ + private double maxX; + /** which dimension was this node split on */ + // TODO: its implicit based on level, but boolean keeps code simple + private boolean splitX; + + // child components, or null + private Polygon2D left; + private Polygon2D right; + + /** tree of holes, or null */ + private final Polygon2D holes; + + /** root node of edge tree */ + private final Edge tree; + + private Polygon2D(Polygon polygon, Polygon2D holes) { + this.holes = holes; this.minLat = polygon.minLat; this.maxLat = polygon.maxLat; this.minLon = polygon.minLon; this.maxLon = polygon.maxLon; + this.maxY = maxLat; + this.maxX = maxLon; // create interval tree of edges this.tree = createTree(polygon.getPolyLats(), polygon.getPolyLons()); @@ -67,17 +84,35 @@ final class LatLonTree { * See * https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html for more information. */ - boolean contains(double latitude, double longitude) { + public boolean contains(double latitude, double longitude) { + if (latitude <= maxY && longitude <= maxX) { + if (componentContains(latitude, longitude)) { + return true; + } + if (left != null) { + if (left.contains(latitude, longitude)) { + return true; + } + } + if (right != null && ((splitX == false && latitude >= minLat) || (splitX && longitude >= minLon))) { + if (right.contains(latitude, longitude)) { + return true; + } + } + } + return false; + } + + /** Returns true if the point is contained within this polygon component. */ + private boolean componentContains(double latitude, double longitude) { // check bounding box if (latitude < minLat || latitude > maxLat || longitude < minLon || longitude > maxLon) { return false; } if (tree.contains(latitude, longitude)) { - for (LatLonTree hole : holes) { - if (hole.contains(latitude, longitude)) { - return false; - } + if (holes != null && holes.contains(latitude, longitude)) { + return false; } return true; } @@ -86,7 +121,30 @@ final class LatLonTree { } /** Returns relation to the provided rectangle */ - Relation relate(double minLat, double maxLat, double minLon, double maxLon) { + public Relation relate(double minLat, double maxLat, double minLon, double maxLon) { + if (minLat <= maxY && minLon <= maxX) { + Relation relation = componentRelate(minLat, maxLat, minLon, maxLon); + if (relation != Relation.CELL_OUTSIDE_QUERY) { + return relation; + } + if (left != null) { + relation = left.relate(minLat, maxLat, minLon, maxLon); + if (relation != Relation.CELL_OUTSIDE_QUERY) { + return relation; + } + } + if (right != null && ((splitX == false && maxLat >= this.minLat) || (splitX && maxLon >= this.minLon))) { + relation = right.relate(minLat, maxLat, minLon, maxLon); + if (relation != Relation.CELL_OUTSIDE_QUERY) { + return relation; + } + } + } + return Relation.CELL_OUTSIDE_QUERY; + } + + /** Returns relation to the provided rectangle for this component */ + private Relation componentRelate(double minLat, double maxLat, double minLon, double maxLon) { // if the bounding boxes are disjoint then the shape does not cross if (maxLon < this.minLon || minLon > this.maxLon || maxLat < this.minLat || minLat > this.maxLat) { return Relation.CELL_OUTSIDE_QUERY; @@ -96,8 +154,8 @@ final class LatLonTree { return Relation.CELL_CROSSES_QUERY; } // check any holes - for (LatLonTree hole : holes) { - Relation holeRelation = hole.relate(minLat, maxLat, minLon, maxLon); + if (holes != null) { + Relation holeRelation = holes.relate(minLat, maxLat, minLon, maxLon); if (holeRelation == Relation.CELL_CROSSES_QUERY) { return Relation.CELL_CROSSES_QUERY; } else if (holeRelation == Relation.CELL_INSIDE_QUERY) { @@ -126,66 +184,85 @@ final class LatLonTree { // returns 0, 4, or something in between private int numberOfCorners(double minLat, double maxLat, double minLon, double maxLon) { int containsCount = 0; - if (contains(minLat, minLon)) { + if (componentContains(minLat, minLon)) { containsCount++; } - if (contains(minLat, maxLon)) { + if (componentContains(minLat, maxLon)) { containsCount++; } if (containsCount == 1) { return containsCount; } - if (contains(maxLat, maxLon)) { + if (componentContains(maxLat, maxLon)) { containsCount++; } if (containsCount == 2) { return containsCount; } - if (contains(maxLat, minLon)) { + if (componentContains(maxLat, minLon)) { containsCount++; } return containsCount; } - - /** Helper for multipolygon logic: returns true if any of the supplied polygons contain the point */ - static boolean contains(LatLonTree[] polygons, double latitude, double longitude) { - for (LatLonTree polygon : polygons) { - if (polygon.contains(latitude, longitude)) { - return true; + + /** Creates tree from sorted components (with range low and high inclusive) */ + private static Polygon2D createTree(Polygon2D components[], int low, int high, boolean splitX) { + if (low > high) { + return null; + } else if (low < high) { + // TODO: do one sort instead! there are better algorithms! + if (splitX) { + Arrays.sort(components, low, high+1, (left, right) -> { + int ret = Double.compare(left.minLon, right.minLon); + if (ret == 0) { + ret = Double.compare(left.maxX, right.maxX); + } + return ret; + }); + } else { + Arrays.sort(components, low, high+1, (left, right) -> { + int ret = Double.compare(left.minLat, right.minLat); + if (ret == 0) { + ret = Double.compare(left.maxY, right.maxY); + } + return ret; + }); } } - return false; - } - - /** Returns the multipolygon relation for the rectangle */ - static Relation relate(LatLonTree[] polygons, double minLat, double maxLat, double minLon, double maxLon) { - for (LatLonTree polygon : polygons) { - Relation relation = polygon.relate(minLat, maxLat, minLon, maxLon); - if (relation != Relation.CELL_OUTSIDE_QUERY) { - // note: we optimize for non-overlapping multipolygons. so if we cross one, - // we won't keep iterating to try to find a contains. - return relation; - } + // add midpoint + int mid = (low + high) >>> 1; + Polygon2D newNode = components[mid]; + newNode.splitX = splitX; + // add children + newNode.left = createTree(components, low, mid - 1, !splitX); + newNode.right = createTree(components, mid + 1, high, !splitX); + // pull up max values to this node + if (newNode.left != null) { + newNode.maxX = Math.max(newNode.maxX, newNode.left.maxX); + newNode.maxY = Math.max(newNode.maxY, newNode.left.maxY); } - return Relation.CELL_OUTSIDE_QUERY; + if (newNode.right != null) { + newNode.maxX = Math.max(newNode.maxX, newNode.right.maxX); + newNode.maxY = Math.max(newNode.maxY, newNode.right.maxY); + } + return newNode; } - /** Builds a tree from multipolygon */ - static LatLonTree[] build(Polygon... polygons) { - // TODO: use one tree with separators (carefully!) - LatLonTree trees[] = new LatLonTree[polygons.length]; - for (int i = 0; i < trees.length; i++) { + /** Builds a Polygon2D from multipolygon */ + public static Polygon2D create(Polygon... polygons) { + Polygon2D components[] = new Polygon2D[polygons.length]; + for (int i = 0; i < components.length; i++) { Polygon gon = polygons[i]; Polygon gonHoles[] = gon.getHoles(); - LatLonTree holes[] = new LatLonTree[gonHoles.length]; - for (int j = 0; j < holes.length; j++) { - holes[j] = new LatLonTree(gonHoles[j]); + Polygon2D holes = null; + if (gonHoles.length > 0) { + holes = create(gonHoles); } - trees[i] = new LatLonTree(gon, holes); + components[i] = new Polygon2D(gon, holes); } - return trees; + return createTree(components, 0, components.length - 1, false); } - + /** * Internal tree node: represents polygon edge from lat1,lon1 to lat2,lon2. * The sort value is {@code low}, which is the minimum latitude of the edge. diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestPolygon.java b/lucene/core/src/test/org/apache/lucene/geo/TestPolygon.java index 12c36900443..401092f5d50 100644 --- a/lucene/core/src/test/org/apache/lucene/geo/TestPolygon.java +++ b/lucene/core/src/test/org/apache/lucene/geo/TestPolygon.java @@ -16,17 +16,8 @@ */ package org.apache.lucene.geo; -import org.apache.lucene.geo.Polygon; -import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.util.LuceneTestCase; -import static org.apache.lucene.geo.GeoTestUtil.nextLatitude; -import static org.apache.lucene.geo.GeoTestUtil.nextLongitude; -import static org.apache.lucene.geo.GeoTestUtil.nextPolygon; - -import java.util.ArrayList; -import java.util.List; - public class TestPolygon extends LuceneTestCase { /** null polyLats not allowed */ @@ -68,325 +59,4 @@ public class TestPolygon extends LuceneTestCase { }); assertTrue(expected.getMessage(), expected.getMessage().contains("it must close itself")); } - - /** Three boxes, an island inside a hole inside a shape */ - public void testMultiPolygon() { - Polygon hole = new Polygon(new double[] { -10, -10, 10, 10, -10 }, new double[] { -10, 10, 10, -10, -10 }); - Polygon outer = new Polygon(new double[] { -50, -50, 50, 50, -50 }, new double[] { -50, 50, 50, -50, -50 }, hole); - Polygon island = new Polygon(new double[] { -5, -5, 5, 5, -5 }, new double[] { -5, 5, 5, -5, -5 } ); - Polygon polygons[] = new Polygon[] { outer, island }; - - // contains(point) - assertTrue(Polygon.contains(polygons, -2, 2)); // on the island - assertFalse(Polygon.contains(polygons, -6, 6)); // in the hole - assertTrue(Polygon.contains(polygons, -25, 25)); // on the mainland - assertFalse(Polygon.contains(polygons, -51, 51)); // in the ocean - - // relate(box): this can conservatively return CELL_CROSSES_QUERY - assertEquals(Relation.CELL_INSIDE_QUERY, Polygon.relate(polygons, -2, 2, -2, 2)); // on the island - assertEquals(Relation.CELL_OUTSIDE_QUERY, Polygon.relate(polygons, 6, 7, 6, 7)); // in the hole - assertEquals(Relation.CELL_INSIDE_QUERY, Polygon.relate(polygons, 24, 25, 24, 25)); // on the mainland - assertEquals(Relation.CELL_OUTSIDE_QUERY, Polygon.relate(polygons, 51, 52, 51, 52)); // in the ocean - assertEquals(Relation.CELL_CROSSES_QUERY, Polygon.relate(polygons, -60, 60, -60, 60)); // enclosing us completely - assertEquals(Relation.CELL_CROSSES_QUERY, Polygon.relate(polygons, 49, 51, 49, 51)); // overlapping the mainland - assertEquals(Relation.CELL_CROSSES_QUERY, Polygon.relate(polygons, 9, 11, 9, 11)); // overlapping the hole - assertEquals(Relation.CELL_CROSSES_QUERY, Polygon.relate(polygons, 5, 6, 5, 6)); // overlapping the island - } - - public void testPacMan() throws Exception { - // pacman - double[] px = {0, 10, 10, 0, -8, -10, -8, 0, 10, 10, 0}; - double[] py = {0, 5, 9, 10, 9, 0, -9, -10, -9, -5, 0}; - - // candidate crosses cell - double xMin = 2;//-5; - double xMax = 11;//0.000001; - double yMin = -1;//0; - double yMax = 1;//5; - - // test cell crossing poly - Polygon polygon = new Polygon(py, px); - assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(yMin, yMax, xMin, xMax)); - } - - public void testBoundingBox() throws Exception { - for (int i = 0; i < 100; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 100; j++) { - double latitude = nextLatitude(); - double longitude = nextLongitude(); - // if the point is within poly, then it should be in our bounding box - if (polygon.contains(latitude, longitude)) { - assertTrue(latitude >= polygon.minLat && latitude <= polygon.maxLat); - assertTrue(longitude >= polygon.minLon && longitude <= polygon.maxLon); - } - } - } - } - - // targets the bounding box directly - public void testBoundingBoxEdgeCases() throws Exception { - for (int i = 0; i < 100; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 100; j++) { - double point[] = GeoTestUtil.nextPointNear(polygon); - double latitude = point[0]; - double longitude = point[1]; - // if the point is within poly, then it should be in our bounding box - if (polygon.contains(latitude, longitude)) { - assertTrue(latitude >= polygon.minLat && latitude <= polygon.maxLat); - assertTrue(longitude >= polygon.minLon && longitude <= polygon.maxLon); - } - } - } - } - - /** If polygon.contains(box) returns true, then any point in that box should return true as well */ - public void testContainsRandom() throws Exception { - for (int i = 0; i < 1000; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 100; j++) { - Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); - // allowed to conservatively return false - if (polygon.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_INSIDE_QUERY) { - for (int k = 0; k < 500; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(rectangle); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertTrue(polygon.contains(latitude, longitude)); - } - } - for (int k = 0; k < 100; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(polygon); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertTrue(polygon.contains(latitude, longitude)); - } - } - } - } - } - } - - /** If polygon.contains(box) returns true, then any point in that box should return true as well */ - // different from testContainsRandom in that its not a purely random test. we iterate the vertices of the polygon - // and generate boxes near each one of those to try to be more efficient. - public void testContainsEdgeCases() throws Exception { - for (int i = 0; i < 1000; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 10; j++) { - Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); - // allowed to conservatively return false - if (polygon.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_INSIDE_QUERY) { - for (int k = 0; k < 100; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(rectangle); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertTrue(polygon.contains(latitude, longitude)); - } - } - for (int k = 0; k < 20; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(polygon); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertTrue(polygon.contains(latitude, longitude)); - } - } - } - } - } - } - - /** If polygon.intersects(box) returns false, then any point in that box should return false as well */ - public void testIntersectRandom() { - for (int i = 0; i < 100; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 100; j++) { - Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); - // allowed to conservatively return true. - if (polygon.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_OUTSIDE_QUERY) { - for (int k = 0; k < 1000; k++) { - double point[] = GeoTestUtil.nextPointNear(rectangle); - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertFalse(polygon.contains(latitude, longitude)); - } - } - for (int k = 0; k < 100; k++) { - double point[] = GeoTestUtil.nextPointNear(polygon); - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertFalse(polygon.contains(latitude, longitude)); - } - } - } - } - } - } - - /** If polygon.intersects(box) returns false, then any point in that box should return false as well */ - // different from testIntersectsRandom in that its not a purely random test. we iterate the vertices of the polygon - // and generate boxes near each one of those to try to be more efficient. - public void testIntersectEdgeCases() { - for (int i = 0; i < 100; i++) { - Polygon polygon = nextPolygon(); - - for (int j = 0; j < 10; j++) { - Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); - // allowed to conservatively return false. - if (polygon.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_OUTSIDE_QUERY) { - for (int k = 0; k < 100; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(rectangle); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertFalse(polygon.contains(latitude, longitude)); - } - } - for (int k = 0; k < 50; k++) { - // this tests in our range but sometimes outside! so we have to double-check its really in other box - double point[] = GeoTestUtil.nextPointNear(polygon); - double latitude = point[0]; - double longitude = point[1]; - // check for sure its in our box - if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { - assertFalse(polygon.contains(latitude, longitude)); - } - } - } - } - } - } - - /** Tests edge case behavior with respect to insideness */ - public void testEdgeInsideness() { - Polygon poly = new Polygon(new double[] { -2, -2, 2, 2, -2 }, new double[] { -2, 2, 2, -2, -2 }); - assertTrue(poly.contains(-2, -2)); // bottom left corner: true - assertFalse(poly.contains(-2, 2)); // bottom right corner: false - assertFalse(poly.contains(2, -2)); // top left corner: false - assertFalse(poly.contains(2, 2)); // top right corner: false - assertTrue(poly.contains(-2, -1)); // bottom side: true - assertTrue(poly.contains(-2, 0)); // bottom side: true - assertTrue(poly.contains(-2, 1)); // bottom side: true - assertFalse(poly.contains(2, -1)); // top side: false - assertFalse(poly.contains(2, 0)); // top side: false - assertFalse(poly.contains(2, 1)); // top side: false - assertFalse(poly.contains(-1, 2)); // right side: false - assertFalse(poly.contains(0, 2)); // right side: false - assertFalse(poly.contains(1, 2)); // right side: false - assertTrue(poly.contains(-1, -2)); // left side: true - assertTrue(poly.contains(0, -2)); // left side: true - assertTrue(poly.contains(1, -2)); // left side: true - } - - /** Tests that our impl supports multiple components and holes (not currently used) */ - public void testMultiPolygonContains() { - // this is the equivalent of the following: we don't recommend anyone do this (e.g. relation logic will not work) - // but lets not lose the property that it works. - /// - // Polygon hole = new Polygon(new double[] { -10, -10, 10, 10, -10 }, new double[] { -10, 10, 10, -10, -10 }); - // Polygon outer = new Polygon(new double[] { -50, -50, 50, 50, -50 }, new double[] { -50, 50, 50, -50, -50 }, hole); - // Polygon island = new Polygon(new double[] { -5, -5, 5, 5, -5 }, new double[] { -5, 5, 5, -5, -5 } ); - // Polygon polygons[] = new Polygon[] { outer, island }; - - Polygon polygon = new Polygon(new double[] { 0, -50, -50, 50, 50, -50, 0, -5, -5, 5, 5, -5, 0, -10, -10, 10, 10, -10, 0 }, - new double[] { 0, -50, 50, 50, -50, -50, 0, -5, 5, 5, -5, -5, 0, -10, 10, 10, -10, -10, 0 }); - - assertTrue(polygon.contains(-2, 2)); // on the island - assertFalse(polygon.contains(-6, 6)); // in the hole - assertTrue(polygon.contains(-25, 25)); // on the mainland - assertFalse(polygon.contains(-51, 51)); // in the ocean - } - - /** Tests current impl against original algorithm */ - public void testContainsAgainstOriginal() { - for (int i = 0; i < 1000; i++) { - Polygon polygon = nextPolygon(); - // currently we don't generate these, but this test does not want holes. - while (polygon.getHoles().length > 0) { - polygon = nextPolygon(); - } - - double polyLats[] = polygon.getPolyLats(); - double polyLons[] = polygon.getPolyLons(); - - // random lat/lons against polygon - for (int j = 0; j < 1000; j++) { - double point[] = GeoTestUtil.nextPointNear(polygon); - double latitude = point[0]; - double longitude = point[1]; - // bounding box check required due to rounding errors (we don't solve that problem) - if (latitude >= polygon.minLat && latitude <= polygon.maxLat && longitude >= polygon.minLon && longitude <= polygon.maxLon) { - boolean expected = containsOriginal(polyLats, polyLons, latitude, longitude); - assertEquals(expected, polygon.contains(latitude, longitude)); - } - } - } - } - - // direct port of PNPOLY C code (https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html) - // this allows us to improve the code yet still ensure we have its properties - // it is under the BSD license (https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html#License%20to%20Use) - // - // Copyright (c) 1970-2003, Wm. Randolph Franklin - // - // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated - // documentation files (the "Software"), to deal in the Software without restriction, including without limitation - // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and - // to permit persons to whom the Software is furnished to do so, subject to the following conditions: - // - // 1. Redistributions of source code must retain the above copyright - // notice, this list of conditions and the following disclaimers. - // 2. Redistributions in binary form must reproduce the above copyright - // notice in the documentation and/or other materials provided with - // the distribution. - // 3. The name of W. Randolph Franklin may not be used to endorse or - // promote products derived from this Software without specific - // prior written permission. - // - // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - // CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - // IN THE SOFTWARE. - private static boolean containsOriginal(double polyLats[], double polyLons[], double latitude, double longitude) { - boolean c = false; - int i, j; - int nvert = polyLats.length; - double verty[] = polyLats; - double vertx[] = polyLons; - double testy = latitude; - double testx = longitude; - for (i = 0, j = nvert-1; i < nvert; j = i++) { - if ( ((verty[i]>testy) != (verty[j]>testy)) && - (testx < (vertx[j]-vertx[i]) * (testy-verty[i]) / (verty[j]-verty[i]) + vertx[i]) ) - c = !c; - } - return c; - } } diff --git a/lucene/core/src/test/org/apache/lucene/geo/TestPolygon2D.java b/lucene/core/src/test/org/apache/lucene/geo/TestPolygon2D.java new file mode 100644 index 00000000000..70281cac1d4 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/geo/TestPolygon2D.java @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.geo; + +import static org.apache.lucene.geo.GeoTestUtil.nextLatitude; +import static org.apache.lucene.geo.GeoTestUtil.nextLongitude; +import static org.apache.lucene.geo.GeoTestUtil.nextPolygon; + +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.util.LuceneTestCase; + +/** Test Polygon2D impl */ +public class TestPolygon2D extends LuceneTestCase { + + /** Three boxes, an island inside a hole inside a shape */ + public void testMultiPolygon() { + Polygon hole = new Polygon(new double[] { -10, -10, 10, 10, -10 }, new double[] { -10, 10, 10, -10, -10 }); + Polygon outer = new Polygon(new double[] { -50, -50, 50, 50, -50 }, new double[] { -50, 50, 50, -50, -50 }, hole); + Polygon island = new Polygon(new double[] { -5, -5, 5, 5, -5 }, new double[] { -5, 5, 5, -5, -5 } ); + Polygon2D polygon = Polygon2D.create(outer, island); + + // contains(point) + assertTrue(polygon.contains(-2, 2)); // on the island + assertFalse(polygon.contains(-6, 6)); // in the hole + assertTrue(polygon.contains(-25, 25)); // on the mainland + assertFalse(polygon.contains(-51, 51)); // in the ocean + + // relate(box): this can conservatively return CELL_CROSSES_QUERY + assertEquals(Relation.CELL_INSIDE_QUERY, polygon.relate(-2, 2, -2, 2)); // on the island + assertEquals(Relation.CELL_OUTSIDE_QUERY, polygon.relate(6, 7, 6, 7)); // in the hole + assertEquals(Relation.CELL_INSIDE_QUERY, polygon.relate(24, 25, 24, 25)); // on the mainland + assertEquals(Relation.CELL_OUTSIDE_QUERY, polygon.relate(51, 52, 51, 52)); // in the ocean + assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(-60, 60, -60, 60)); // enclosing us completely + assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(49, 51, 49, 51)); // overlapping the mainland + assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(9, 11, 9, 11)); // overlapping the hole + assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(5, 6, 5, 6)); // overlapping the island + } + + public void testPacMan() throws Exception { + // pacman + double[] px = {0, 10, 10, 0, -8, -10, -8, 0, 10, 10, 0}; + double[] py = {0, 5, 9, 10, 9, 0, -9, -10, -9, -5, 0}; + + // candidate crosses cell + double xMin = 2;//-5; + double xMax = 11;//0.000001; + double yMin = -1;//0; + double yMax = 1;//5; + + // test cell crossing poly + Polygon2D polygon = Polygon2D.create(new Polygon(py, px)); + assertEquals(Relation.CELL_CROSSES_QUERY, polygon.relate(yMin, yMax, xMin, xMax)); + } + + public void testBoundingBox() throws Exception { + for (int i = 0; i < 100; i++) { + Polygon2D polygon = Polygon2D.create(nextPolygon()); + + for (int j = 0; j < 100; j++) { + double latitude = nextLatitude(); + double longitude = nextLongitude(); + // if the point is within poly, then it should be in our bounding box + if (polygon.contains(latitude, longitude)) { + assertTrue(latitude >= polygon.minLat && latitude <= polygon.maxLat); + assertTrue(longitude >= polygon.minLon && longitude <= polygon.maxLon); + } + } + } + } + + // targets the bounding box directly + public void testBoundingBoxEdgeCases() throws Exception { + for (int i = 0; i < 100; i++) { + Polygon polygon = nextPolygon(); + Polygon2D impl = Polygon2D.create(polygon); + + for (int j = 0; j < 100; j++) { + double point[] = GeoTestUtil.nextPointNear(polygon); + double latitude = point[0]; + double longitude = point[1]; + // if the point is within poly, then it should be in our bounding box + if (impl.contains(latitude, longitude)) { + assertTrue(latitude >= polygon.minLat && latitude <= polygon.maxLat); + assertTrue(longitude >= polygon.minLon && longitude <= polygon.maxLon); + } + } + } + } + + /** If polygon.contains(box) returns true, then any point in that box should return true as well */ + public void testContainsRandom() throws Exception { + for (int i = 0; i < 1000; i++) { + Polygon polygon = nextPolygon(); + Polygon2D impl = Polygon2D.create(polygon); + + for (int j = 0; j < 100; j++) { + Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); + // allowed to conservatively return false + if (impl.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_INSIDE_QUERY) { + for (int k = 0; k < 500; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(rectangle); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertTrue(impl.contains(latitude, longitude)); + } + } + for (int k = 0; k < 100; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(polygon); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertTrue(impl.contains(latitude, longitude)); + } + } + } + } + } + } + + /** If polygon.contains(box) returns true, then any point in that box should return true as well */ + // different from testContainsRandom in that its not a purely random test. we iterate the vertices of the polygon + // and generate boxes near each one of those to try to be more efficient. + public void testContainsEdgeCases() throws Exception { + for (int i = 0; i < 1000; i++) { + Polygon polygon = nextPolygon(); + Polygon2D impl = Polygon2D.create(polygon); + + for (int j = 0; j < 10; j++) { + Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); + // allowed to conservatively return false + if (impl.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_INSIDE_QUERY) { + for (int k = 0; k < 100; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(rectangle); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertTrue(impl.contains(latitude, longitude)); + } + } + for (int k = 0; k < 20; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(polygon); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertTrue(impl.contains(latitude, longitude)); + } + } + } + } + } + } + + /** If polygon.intersects(box) returns false, then any point in that box should return false as well */ + public void testIntersectRandom() { + for (int i = 0; i < 100; i++) { + Polygon polygon = nextPolygon(); + Polygon2D impl = Polygon2D.create(polygon); + + for (int j = 0; j < 100; j++) { + Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); + // allowed to conservatively return true. + if (impl.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_OUTSIDE_QUERY) { + for (int k = 0; k < 1000; k++) { + double point[] = GeoTestUtil.nextPointNear(rectangle); + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertFalse(impl.contains(latitude, longitude)); + } + } + for (int k = 0; k < 100; k++) { + double point[] = GeoTestUtil.nextPointNear(polygon); + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertFalse(impl.contains(latitude, longitude)); + } + } + } + } + } + } + + /** If polygon.intersects(box) returns false, then any point in that box should return false as well */ + // different from testIntersectsRandom in that its not a purely random test. we iterate the vertices of the polygon + // and generate boxes near each one of those to try to be more efficient. + public void testIntersectEdgeCases() { + for (int i = 0; i < 100; i++) { + Polygon polygon = nextPolygon(); + Polygon2D impl = Polygon2D.create(polygon); + + for (int j = 0; j < 10; j++) { + Rectangle rectangle = GeoTestUtil.nextBoxNear(polygon); + // allowed to conservatively return false. + if (impl.relate(rectangle.minLat, rectangle.maxLat, rectangle.minLon, rectangle.maxLon) == Relation.CELL_OUTSIDE_QUERY) { + for (int k = 0; k < 100; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(rectangle); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertFalse(impl.contains(latitude, longitude)); + } + } + for (int k = 0; k < 50; k++) { + // this tests in our range but sometimes outside! so we have to double-check its really in other box + double point[] = GeoTestUtil.nextPointNear(polygon); + double latitude = point[0]; + double longitude = point[1]; + // check for sure its in our box + if (latitude >= rectangle.minLat && latitude <= rectangle.maxLat && longitude >= rectangle.minLon && longitude <= rectangle.maxLon) { + assertFalse(impl.contains(latitude, longitude)); + } + } + } + } + } + } + + /** Tests edge case behavior with respect to insideness */ + public void testEdgeInsideness() { + Polygon2D poly = Polygon2D.create(new Polygon(new double[] { -2, -2, 2, 2, -2 }, new double[] { -2, 2, 2, -2, -2 })); + assertTrue(poly.contains(-2, -2)); // bottom left corner: true + assertFalse(poly.contains(-2, 2)); // bottom right corner: false + assertFalse(poly.contains(2, -2)); // top left corner: false + assertFalse(poly.contains(2, 2)); // top right corner: false + assertTrue(poly.contains(-2, -1)); // bottom side: true + assertTrue(poly.contains(-2, 0)); // bottom side: true + assertTrue(poly.contains(-2, 1)); // bottom side: true + assertFalse(poly.contains(2, -1)); // top side: false + assertFalse(poly.contains(2, 0)); // top side: false + assertFalse(poly.contains(2, 1)); // top side: false + assertFalse(poly.contains(-1, 2)); // right side: false + assertFalse(poly.contains(0, 2)); // right side: false + assertFalse(poly.contains(1, 2)); // right side: false + assertTrue(poly.contains(-1, -2)); // left side: true + assertTrue(poly.contains(0, -2)); // left side: true + assertTrue(poly.contains(1, -2)); // left side: true + } + + /** Tests current impl against original algorithm */ + public void testContainsAgainstOriginal() { + for (int i = 0; i < 1000; i++) { + Polygon polygon = nextPolygon(); + // currently we don't generate these, but this test does not want holes. + while (polygon.getHoles().length > 0) { + polygon = nextPolygon(); + } + Polygon2D impl = Polygon2D.create(polygon); + + // random lat/lons against polygon + for (int j = 0; j < 1000; j++) { + double point[] = GeoTestUtil.nextPointNear(polygon); + double latitude = point[0]; + double longitude = point[1]; + boolean expected = GeoTestUtil.containsSlowly(polygon, latitude, longitude); + assertEquals(expected, impl.contains(latitude, longitude)); + } + } + } +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java deleted file mode 100644 index 4b3b2b2acb9..00000000000 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonGrid.java +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.document; - -import org.apache.lucene.geo.Polygon; -import org.apache.lucene.index.PointValues.Relation; -import org.apache.lucene.util.FixedBitSet; - -import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; -import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; - -/** - * This is a temporary hack, until some polygon methods have better performance! - *

- * When this file is removed then we have made good progress! In general we don't call - * the point-in-polygon algorithm that much, because of how BKD divides up the data. But - * today the method is very slow (general to all polygons, linear with the number of vertices). - * At the same time polygon-rectangle relation operations are also slow in the same way, this - * just really ensures they are the bottleneck by removing most of the point-in-polygon calls. - *

- * See the "grid" algorithm description here: http://erich.realtimerendering.com/ptinpoly/ - * A few differences: - *

    - *
  • We work in an integer encoding, so edge cases are simpler. - *
  • We classify each grid cell as "contained", "not contained", or "don't know". - *
  • We form a grid over a potentially complex multipolygon with holes. - *
  • Construction is less efficient because we do not do anything "smart" such - * as following polygon edges. - *
  • Instead we construct a baby tree to reduce the number of relation operations, - * which are currently expensive. - *
- */ -// TODO: just make a more proper tree (maybe in-ram BKD)? then we can answer most -// relational operations as rectangle <-> rectangle relations in integer space in log(n) time.. -final class LatLonGrid { - // must be a power of two! - static final int GRID_SIZE = 1<<7; - final int minLat; - final int maxLat; - final int minLon; - final int maxLon; - // TODO: something more efficient than parallel bitsets? maybe one bitset? - final FixedBitSet haveAnswer = new FixedBitSet(GRID_SIZE * GRID_SIZE); - final FixedBitSet answer = new FixedBitSet(GRID_SIZE * GRID_SIZE); - - final long latPerCell; - final long lonPerCell; - - final LatLonTree[] tree; - - LatLonGrid(int minLat, int maxLat, int minLon, int maxLon, LatLonTree[] tree) { - this.minLat = minLat; - this.maxLat = maxLat; - this.minLon = minLon; - this.maxLon = maxLon; - this.tree = tree; - if (minLon > maxLon) { - // maybe make 2 grids if you want this? - throw new IllegalArgumentException("Grid cannot cross the dateline"); - } - if (minLat > maxLat) { - throw new IllegalArgumentException("bogus grid"); - } - long latitudeRange = maxLat - (long) minLat; - long longitudeRange = maxLon - (long) minLon; - - // if the range is too small, we can't divide it up in our grid nicely. - // in this case of a tiny polygon, we just make an empty grid instead of complicating/slowing down code. - final long minRange = (GRID_SIZE - 1) * (GRID_SIZE - 1); - if (latitudeRange < minRange || longitudeRange < minRange) { - latPerCell = lonPerCell = Long.MAX_VALUE; - } else { - // we spill over the edge of the bounding box in each direction a bit, - // but it prevents edge case bugs. - latPerCell = latitudeRange / (GRID_SIZE - 1); - lonPerCell = longitudeRange / (GRID_SIZE - 1); - fill(0, GRID_SIZE, 0, GRID_SIZE); - } - } - - /** fills a 2D range of grid cells [minLatIndex .. maxLatIndex) X [minLonIndex .. maxLonIndex) */ - void fill(int minLatIndex, int maxLatIndex, int minLonIndex, int maxLonIndex) { - // grid cells at the edge of the bounding box are typically smaller than normal, because we spill over. - long cellMinLat = minLat + (minLatIndex * latPerCell); - long cellMaxLat = Math.min(maxLat, minLat + (maxLatIndex * latPerCell) - 1); - long cellMinLon = minLon + (minLonIndex * lonPerCell); - long cellMaxLon = Math.min(maxLon, minLon + (maxLonIndex * lonPerCell) - 1); - - assert cellMinLat <= maxLat && cellMinLon <= maxLon; - assert cellMaxLat >= cellMinLat; - assert cellMaxLon >= cellMinLon; - - Relation relation = LatLonTree.relate(tree, decodeLatitude((int) cellMinLat), - decodeLatitude((int) cellMaxLat), - decodeLongitude((int) cellMinLon), - decodeLongitude((int) cellMaxLon)); - if (relation != Relation.CELL_CROSSES_QUERY) { - // we know the answer for this region, fill the cell range - for (int i = minLatIndex; i < maxLatIndex; i++) { - for (int j = minLonIndex; j < maxLonIndex; j++) { - int index = i * GRID_SIZE + j; - assert haveAnswer.get(index) == false; - haveAnswer.set(index); - if (relation == Relation.CELL_INSIDE_QUERY) { - answer.set(index); - } - } - } - } else if (minLatIndex == maxLatIndex - 1) { - // nothing more to do: this is a single grid cell (leaf node) and - // is an edge case for the polygon. - } else { - // grid range crosses our polygon, keep recursing. - int midLatIndex = (minLatIndex + maxLatIndex) >>> 1; - int midLonIndex = (minLonIndex + maxLonIndex) >>> 1; - fill(minLatIndex, midLatIndex, minLonIndex, midLonIndex); - fill(minLatIndex, midLatIndex, midLonIndex, maxLonIndex); - fill(midLatIndex, maxLatIndex, minLonIndex, midLonIndex); - fill(midLatIndex, maxLatIndex, midLonIndex, maxLonIndex); - } - } - - /** Returns true if inside one of our polygons, false otherwise */ - boolean contains(int latitude, int longitude) { - // first see if the grid knows the answer - int index = index(latitude, longitude); - if (index == -1) { - return false; // outside of bounding box range - } else if (haveAnswer.get(index)) { - return answer.get(index); - } - - // the grid is unsure (boundary): do a real test. - double docLatitude = decodeLatitude(latitude); - double docLongitude = decodeLongitude(longitude); - return LatLonTree.contains(tree, docLatitude, docLongitude); - } - - /** Returns grid index of lat/lon, or -1 if the value is outside of the bounding box. */ - private int index(int latitude, int longitude) { - if (latitude < minLat || latitude > maxLat || longitude < minLon || longitude > maxLon) { - return -1; // outside of bounding box range - } - - long latRel = latitude - (long) minLat; - long lonRel = longitude - (long) minLon; - - int latIndex = (int) (latRel / latPerCell); - assert latIndex < GRID_SIZE; - int lonIndex = (int) (lonRel / lonPerCell); - assert lonIndex < GRID_SIZE; - return latIndex * GRID_SIZE + lonIndex; - } -} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index e68cb4579c6..ee7c1e8c157 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -38,6 +38,7 @@ import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.StringHelper; import org.apache.lucene.geo.Polygon; +import org.apache.lucene.geo.Polygon2D; import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; @@ -92,11 +93,7 @@ final class LatLonPointInPolygonQuery extends Query { NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0); NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); - final LatLonTree[] tree = LatLonTree.build(polygons); - final LatLonGrid grid = new LatLonGrid(encodeLatitude(box.minLat), - encodeLatitude(box.maxLat), - encodeLongitude(box.minLon), - encodeLongitude(box.maxLon), tree); + final Polygon2D tree = Polygon2D.create(polygons); return new ConstantScoreWeight(this) { @@ -132,17 +129,8 @@ final class LatLonPointInPolygonQuery extends Query { @Override public void visit(int docID, byte[] packedValue) { - // we bounds check individual values, as subtrees may cross, but we are being sent the values anyway: - // this reduces the amount of docvalues fetches (improves approximation) - if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 || - StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0 || - StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, maxLon, 0) > 0 || - StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon, 0) < 0) { - // outside of global bounding box range - return; - } - if (grid.contains(NumericUtils.sortableBytesToInt(packedValue, 0), - NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) { + if (tree.contains(decodeLatitude(packedValue, 0), + decodeLongitude(packedValue, Integer.BYTES))) { result.add(docID); } } @@ -162,7 +150,7 @@ final class LatLonPointInPolygonQuery extends Query { double cellMaxLat = decodeLatitude(maxPackedValue, 0); double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES); - return LatLonTree.relate(tree, cellMinLat, cellMaxLat, cellMinLon, cellMaxLon); + return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon); } }); diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java deleted file mode 100644 index 0c185ea366d..00000000000 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonGrid.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.document; - -import org.apache.lucene.geo.GeoTestUtil; -import org.apache.lucene.geo.GeoUtils; -import org.apache.lucene.geo.Polygon; -import org.apache.lucene.geo.Rectangle; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.TestUtil; - -import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; -import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; -import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; -import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; - -/** tests against LatLonGrid (avoiding indexing/queries) */ -public class TestLatLonGrid extends LuceneTestCase { - - /** If the grid returns true, then any point in that cell should return true as well */ - public void testRandom() throws Exception { - for (int i = 0; i < 1000; i++) { - Polygon polygon = GeoTestUtil.nextPolygon(); - Rectangle box = Rectangle.fromPolygon(new Polygon[] { polygon }); - int minLat = encodeLatitude(box.minLat); - int maxLat = encodeLatitude(box.maxLat); - int minLon = encodeLongitude(box.minLon); - int maxLon = encodeLongitude(box.maxLon); - LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, LatLonTree.build(polygon)); - // we are in integer space... but exhaustive testing is slow! - for (int j = 0; j < 10000; j++) { - int lat = TestUtil.nextInt(random(), minLat, maxLat); - int lon = TestUtil.nextInt(random(), minLon, maxLon); - - boolean expected = polygon.contains(decodeLatitude(lat), - decodeLongitude(lon)); - boolean actual = grid.contains(lat, lon); - assertEquals(expected, actual); - } - } - } - - public void testGrowingPolygon() { - double centerLat = -80.0 + random().nextDouble() * 160.0; - double centerLon = -170.0 + random().nextDouble() * 340.0; - double radiusMeters = 0.0; - for(int i=0;i<10;i++) { - radiusMeters = Math.nextUp(radiusMeters); - } - - // Start with a miniscule polygon, and grow it: - int gons = TestUtil.nextInt(random(), 4, 10); - while (radiusMeters < GeoUtils.EARTH_MEAN_RADIUS_METERS * Math.PI / 2.0 + 1.0) { - Polygon polygon; - try { - polygon = GeoTestUtil.createRegularPolygon(centerLat, centerLon, radiusMeters, gons); - } catch (IllegalArgumentException iae) { - // OK: we made a too-big poly and it crossed a pole or dateline - break; - } - radiusMeters *= 1.1; - - Rectangle box = Rectangle.fromPolygon(new Polygon[] { polygon }); - int minLat = encodeLatitude(box.minLat); - int maxLat = encodeLatitude(box.maxLat); - int minLon = encodeLongitude(box.minLon); - int maxLon = encodeLongitude(box.maxLon); - LatLonGrid grid = new LatLonGrid(minLat, maxLat, minLon, maxLon, LatLonTree.build(polygon)); - // we are in integer space... but exhaustive testing is slow! - for (int j = 0; j < 1000; j++) { - int lat = TestUtil.nextInt(random(), minLat, maxLat); - int lon = TestUtil.nextInt(random(), minLon, maxLon); - - boolean expected = polygon.contains(decodeLatitude(lat), - decodeLongitude(lon)); - boolean actual = grid.contains(lat, lon); - assertEquals(expected, actual); - } - } - } - - /** create ever-increasing grids and check that too-small polygons don't blow it up */ - public void testTinyGrids() { - double ZERO = decodeLatitude(0); - double ONE = decodeLatitude(1); - Polygon tiny = new Polygon(new double[] { ZERO, ZERO, ONE, ONE, ZERO }, new double[] { ZERO, ONE, ONE, ZERO, ZERO }); - for (int max = 1; max < 500000; max++) { - LatLonGrid grid = new LatLonGrid(0, max, 0, max, LatLonTree.build(tiny)); - assertEquals(tiny.contains(decodeLatitude(max), decodeLongitude(max)), grid.contains(max, max)); - } - } -} diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonTree.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonTree.java deleted file mode 100644 index c939026671e..00000000000 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonTree.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.document; - -import org.apache.lucene.geo.GeoTestUtil; -import org.apache.lucene.geo.Polygon; -import org.apache.lucene.geo.Rectangle; -import org.apache.lucene.index.PointValues.Relation; -import org.apache.lucene.util.LuceneTestCase; - -/** Test LatLonTree against the slower implementation for now */ -public class TestLatLonTree extends LuceneTestCase { - - /** test that contains() works the same as brute force */ - public void testContainsRandom() { - for (int i = 0; i < 1000; i++) { - Polygon polygon = GeoTestUtil.nextPolygon(); - LatLonTree tree = new LatLonTree(polygon); - for (int j = 0; j < 1000; j++) { - double point[] = GeoTestUtil.nextPointNear(polygon); - boolean expected = polygon.contains(point[0], point[1]); - assertEquals(expected, tree.contains(point[0], point[1])); - } - } - } - - /** test that relate() works the same as brute force */ - public void testRelateRandom() { - for (int i = 0; i < 1000; i++) { - Polygon polygon = GeoTestUtil.nextPolygon(); - LatLonTree tree = new LatLonTree(polygon); - for (int j = 0; j < 1000; j++) { - Rectangle box = GeoTestUtil.nextBoxNear(polygon); - Relation expected = polygon.relate(box.minLat, box.maxLat, box.minLon, box.maxLon); - assertEquals(expected, tree.relate(box.minLat, box.maxLat, box.minLon, box.maxLon)); - } - } - } -} diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java index 14b7cc7fa2b..047bf27292b 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointInPolygonQueryImpl.java @@ -20,7 +20,7 @@ import java.util.Objects; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.spatial.geopoint.document.GeoPointField.TermEncoding; -import org.apache.lucene.geo.Polygon; +import org.apache.lucene.geo.Polygon2D; import org.apache.lucene.index.PointValues.Relation; /** Package private implementation for the public facing GeoPointInPolygonQuery delegate class. @@ -29,13 +29,13 @@ import org.apache.lucene.index.PointValues.Relation; */ final class GeoPointInPolygonQueryImpl extends GeoPointInBBoxQueryImpl { private final GeoPointInPolygonQuery polygonQuery; - private final Polygon[] polygons; + private final Polygon2D polygons; GeoPointInPolygonQueryImpl(final String field, final TermEncoding termEncoding, final GeoPointInPolygonQuery q, final double minLat, final double maxLat, final double minLon, final double maxLon) { super(field, termEncoding, minLat, maxLat, minLon, maxLon); this.polygonQuery = Objects.requireNonNull(q); - this.polygons = Objects.requireNonNull(q.polygons); + this.polygons = Polygon2D.create(q.polygons); } @Override @@ -59,17 +59,17 @@ final class GeoPointInPolygonQueryImpl extends GeoPointInBBoxQueryImpl { @Override protected boolean cellCrosses(final double minLat, final double maxLat, final double minLon, final double maxLon) { - return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) == Relation.CELL_CROSSES_QUERY; + return polygons.relate(minLat, maxLat, minLon, maxLon) == Relation.CELL_CROSSES_QUERY; } @Override protected boolean cellWithin(final double minLat, final double maxLat, final double minLon, final double maxLon) { - return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) == Relation.CELL_INSIDE_QUERY; + return polygons.relate(minLat, maxLat, minLon, maxLon) == Relation.CELL_INSIDE_QUERY; } @Override protected boolean cellIntersectsShape(final double minLat, final double maxLat, final double minLon, final double maxLon) { - return Polygon.relate(polygons, minLat, maxLat, minLon, maxLon) != Relation.CELL_OUTSIDE_QUERY; + return polygons.relate(minLat, maxLat, minLon, maxLon) != Relation.CELL_OUTSIDE_QUERY; } /** @@ -77,11 +77,11 @@ final class GeoPointInPolygonQueryImpl extends GeoPointInBBoxQueryImpl { * {@link org.apache.lucene.spatial.geopoint.search.GeoPointTermsEnum#accept} method is called to match * encoded terms that fall within the bounding box of the polygon. Those documents that pass the initial * bounding box filter are then compared to the provided polygon using the - * {@link Polygon#contains(Polygon[], double, double)} method. + * {@link Polygon2D#contains(double, double)} method. */ @Override protected boolean postFilter(final double lat, final double lon) { - return Polygon.contains(polygons, lat, lon); + return polygons.contains(lat, lon); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java index 6bc1e6e061e..bda4cdebaad 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java @@ -1112,7 +1112,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase { } else if (Double.isNaN(lats[id])) { expected = false; } else { - expected = polygon.contains(lats[id], lons[id]); + expected = GeoTestUtil.containsSlowly(polygon, lats[id], lons[id]); } if (hits.get(docID) != expected) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/GeoTestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/geo/GeoTestUtil.java index 98e296684ac..62b824fd31e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/geo/GeoTestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/geo/GeoTestUtil.java @@ -642,4 +642,58 @@ public class GeoTestUtil { sb.append("\n"); return sb.toString(); } + + /** + * Simple slow point in polygon check (for testing) + */ + // direct port of PNPOLY C code (https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html) + // this allows us to improve the code yet still ensure we have its properties + // it is under the BSD license (https://www.ecse.rpi.edu/~wrf/Research/Short_Notes/pnpoly.html#License%20to%20Use) + // + // Copyright (c) 1970-2003, Wm. Randolph Franklin + // + // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + // documentation files (the "Software"), to deal in the Software without restriction, including without limitation + // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and + // to permit persons to whom the Software is furnished to do so, subject to the following conditions: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimers. + // 2. Redistributions in binary form must reproduce the above copyright + // notice in the documentation and/or other materials provided with + // the distribution. + // 3. The name of W. Randolph Franklin may not be used to endorse or + // promote products derived from this Software without specific + // prior written permission. + // + // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + // CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + // IN THE SOFTWARE. + public static boolean containsSlowly(Polygon polygon, double latitude, double longitude) { + if (polygon.getHoles().length > 0) { + throw new UnsupportedOperationException("this testing method does not support holes"); + } + double polyLats[] = polygon.getPolyLats(); + double polyLons[] = polygon.getPolyLons(); + // bounding box check required due to rounding errors (we don't solve that problem) + if (latitude < polygon.minLat || latitude > polygon.maxLat || longitude < polygon.minLon || longitude > polygon.maxLon) { + return false; + } + + boolean c = false; + int i, j; + int nvert = polyLats.length; + double verty[] = polyLats; + double vertx[] = polyLons; + double testy = latitude; + double testx = longitude; + for (i = 0, j = nvert-1; i < nvert; j = i++) { + if ( ((verty[i]>testy) != (verty[j]>testy)) && + (testx < (vertx[j]-vertx[i]) * (testy-verty[i]) / (verty[j]-verty[i]) + vertx[i]) ) + c = !c; + } + return c; + } } From 922265b478296992189434040517368cf93d1b09 Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Tue, 26 Apr 2016 01:50:26 +0530 Subject: [PATCH 16/18] SOLR-9014: Deprecate and reduce usage of ClusterState methods which may make calls to ZK via the lazy collection reference --- solr/CHANGES.txt | 3 ++ .../java/org/apache/solr/cloud/Assign.java | 8 ++-- .../OverseerAutoReplicaFailoverThread.java | 1 + .../OverseerCollectionMessageHandler.java | 40 ++++++++-------- .../OverseerConfigSetMessageHandler.java | 1 + .../cloud/overseer/ClusterStateMutator.java | 16 +++---- .../cloud/overseer/CollectionMutator.java | 6 +-- .../solr/cloud/overseer/NodeMutator.java | 9 ++-- .../solr/cloud/overseer/ReplicaMutator.java | 36 +++++++------- .../solr/cloud/overseer/SliceMutator.java | 39 ++++++++------- .../solr/cloud/overseer/ZkStateWriter.java | 3 +- .../java/org/apache/solr/core/SolrCore.java | 5 +- .../handler/CdcrUpdateLogSynchronizer.java | 4 +- .../org/apache/solr/handler/SQLHandler.java | 1 + .../handler/admin/CollectionsHandler.java | 27 +++++------ .../handler/admin/CoreAdminOperation.java | 11 +++-- .../org/apache/solr/servlet/HttpSolrCall.java | 11 +++-- .../processor/DistributedUpdateProcessor.java | 9 ++-- .../java/org/apache/solr/util/SolrCLI.java | 1 + .../org/apache/solr/util/SolrLogLayout.java | 10 ++-- .../org/apache/solr/cloud/AssignTest.java | 2 +- .../CollectionsAPIDistributedZkTest.java | 8 ++-- .../solr/cloud/CollectionsAPISolrJTest.java | 2 +- .../org/apache/solr/cloud/OverseerTest.java | 3 +- .../apache/solr/handler/TestSQLHandler.java | 1 + .../client/solrj/io/stream/TopicStream.java | 1 + .../solr/common/cloud/ClusterState.java | 48 ++++++++++++++----- .../solr/common/cloud/ClusterStateUtil.java | 1 + .../solr/common/cloud/DocCollection.java | 6 +++ .../solr/common/cloud/ZkStateReader.java | 3 +- .../solr/client/solrj/io/sql/JdbcTest.java | 1 + 31 files changed, 186 insertions(+), 131 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 6034851e949..999bd737fd9 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -169,6 +169,9 @@ Optimizations * SOLR-8973: Zookeeper frenzy when a core is first created. (Janmejay Singh, Scott Blum, shalin) +* SOLR-9014: Deprecate and reduce usage of ClusterState methods which may make calls to ZK via + the lazy collection reference. (Scott Blum, shalin) + Other Changes ---------------------- * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy. diff --git a/solr/core/src/java/org/apache/solr/cloud/Assign.java b/solr/core/src/java/org/apache/solr/cloud/Assign.java index 693927085b5..92310ed5f05 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Assign.java +++ b/solr/core/src/java/org/apache/solr/cloud/Assign.java @@ -46,8 +46,8 @@ import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE; public class Assign { private static Pattern COUNT = Pattern.compile("core_node(\\d+)"); - public static String assignNode(String collection, ClusterState state) { - Map sliceMap = state.getSlicesMap(collection); + public static String assignNode(DocCollection collection) { + Map sliceMap = collection != null ? collection.getSlicesMap() : null; if (sliceMap == null) { return "core_node1"; } @@ -70,12 +70,12 @@ public class Assign { * * @return the assigned shard id */ - public static String assignShard(String collection, ClusterState state, Integer numShards) { + public static String assignShard(DocCollection collection, Integer numShards) { if (numShards == null) { numShards = 1; } String returnShardId = null; - Map sliceMap = state.getActiveSlicesMap(collection); + Map sliceMap = collection != null ? collection.getActiveSlicesMap() : null; // TODO: now that we create shards ahead of time, is this code needed? Esp since hash ranges aren't assigned when creating via this method? diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java index f94ffcc73ba..e8ac6c5ddf6 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java index 6bff6481fd4..ce0484100d0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java @@ -945,16 +945,13 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler + router.getClass().getName()); } } else { - parentSlice = clusterState.getSlice(collectionName, slice); + parentSlice = collection.getSlice(slice); } if (parentSlice == null) { - if (clusterState.hasCollection(collectionName)) { - throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice); - } else { - throw new SolrException(ErrorCode.BAD_REQUEST, - "No collection with the specified name exists: " + collectionName); - } + // no chance of the collection being null because ClusterState#getCollection(String) would have thrown + // an exception already + throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice); } // find the leader for the shard @@ -1039,7 +1036,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler String subShardName = collectionName + "_" + subSlice + "_replica1"; subShardNames.add(subShardName); - Slice oSlice = clusterState.getSlice(collectionName, subSlice); + Slice oSlice = collection.getSlice(subSlice); if (oSlice != null) { final Slice.State state = oSlice.getState(); if (state == Slice.State.ACTIVE) { @@ -1180,7 +1177,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler // TODO: Have replication factor decided in some other way instead of numShards for the parent - int repFactor = clusterState.getSlice(collectionName, slice).getReplicas().size(); + int repFactor = parentSlice.getReplicas().size(); // we need to look at every node and see how many cores it serves // add our new cores to existing nodes serving the least number of cores @@ -1379,18 +1376,18 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler } private void deleteShard(ClusterState clusterState, ZkNodeProps message, NamedList results) { - String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP); log.info("Delete shard invoked"); - Slice slice = clusterState.getSlice(collection, sliceId); + Slice slice = clusterState.getSlice(collectionName, sliceId); if (slice == null) { - if (clusterState.hasCollection(collection)) { + if (clusterState.hasCollection(collectionName)) { throw new SolrException(ErrorCode.BAD_REQUEST, - "No shard with name " + sliceId + " exists for collection " + collection); + "No shard with name " + sliceId + " exists for collection " + collectionName); } else { - throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collection); + throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName); } } // For now, only allow for deletions of Inactive slices or custom hashes (range==null). @@ -1421,7 +1418,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler processResponses(results, shardHandler, true, "Failed to delete shard", asyncId, requestMap, Collections.emptySet()); ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, - collection, ZkStateReader.SHARD_ID_PROP, sliceId); + collectionName, ZkStateReader.SHARD_ID_PROP, sliceId); Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); // wait for a while until we don't see the shard @@ -1429,7 +1426,8 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler boolean removed = false; while (! timeout.hasTimedOut()) { Thread.sleep(100); - removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null; + DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName); + removed = collection.getSlice(sliceId) == null; if (removed) { Thread.sleep(100); // just a bit of time so it's more likely other readers see on return break; @@ -1437,16 +1435,16 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler } if (!removed) { throw new SolrException(ErrorCode.SERVER_ERROR, - "Could not fully remove collection: " + collection + " shard: " + sliceId); + "Could not fully remove collection: " + collectionName + " shard: " + sliceId); } - log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId); + log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId); } catch (SolrException e) { throw e; } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, - "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e); + "Error executing delete operation for collection: " + collectionName + " shard: " + sliceId, e); } } @@ -1561,7 +1559,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler boolean added = false; while (! waitUntil.hasTimedOut()) { Thread.sleep(100); - Map rules = zkStateReader.getClusterState().getSlice(sourceCollection.getName(), sourceSlice.getName()).getRoutingRules(); + sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName()); + sourceSlice = sourceCollection.getSlice(sourceSlice.getName()); + Map rules = sourceSlice.getRoutingRules(); if (rules != null) { RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!"); if (rule != null && rule.getRouteRanges().contains(splitRange)) { diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java index 1f972ffde84..15fed42182f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerConfigSetMessageHandler.java @@ -30,6 +30,7 @@ import java.util.Set; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkConfigManager; import org.apache.solr.common.cloud.ZkNodeProps; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java index 7ffa8c1d556..0a76d91528e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java @@ -122,7 +122,7 @@ public class ClusterStateMutator { public static ClusterState newState(ClusterState state, String name, DocCollection collection) { ClusterState newClusterState = null; if (collection == null) { - newClusterState = state.copyWith(name, (DocCollection) null); + newClusterState = state.copyWith(name, null); } else { newClusterState = state.copyWith(name, collection); } @@ -153,9 +153,8 @@ public class ClusterStateMutator { /* * Return an already assigned id or null if not assigned */ - public static String getAssignedId(final ClusterState state, final String nodeName, - final ZkNodeProps coreState) { - Collection slices = state.getSlices(coreState.getStr(ZkStateReader.COLLECTION_PROP)); + public static String getAssignedId(final DocCollection collection, final String nodeName) { + Collection slices = collection != null ? collection.getSlices() : null; if (slices != null) { for (Slice slice : slices) { if (slice.getReplicasMap().get(nodeName) != null) { @@ -166,18 +165,15 @@ public class ClusterStateMutator { return null; } - public static String getAssignedCoreNodeName(ClusterState state, ZkNodeProps message) { - Collection slices = state.getSlices(message.getStr(ZkStateReader.COLLECTION_PROP)); + public static String getAssignedCoreNodeName(DocCollection collection, String forNodeName, String forCoreName) { + Collection slices = collection != null ? collection.getSlices() : null; if (slices != null) { for (Slice slice : slices) { for (Replica replica : slice.getReplicas()) { String nodeName = replica.getStr(ZkStateReader.NODE_NAME_PROP); String core = replica.getStr(ZkStateReader.CORE_NAME_PROP); - String msgNodeName = message.getStr(ZkStateReader.NODE_NAME_PROP); - String msgCore = message.getStr(ZkStateReader.CORE_NAME_PROP); - - if (nodeName.equals(msgNodeName) && core.equals(msgCore)) { + if (nodeName.equals(forNodeName) && core.equals(forCoreName)) { return replica.getName(); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java index 4f7cb52070d..3d950fe241b 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java @@ -51,7 +51,8 @@ public class CollectionMutator { String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); if (!checkCollectionKeyExistence(message)) return ZkStateWriter.NO_OP; String shardId = message.getStr(ZkStateReader.SHARD_ID_PROP); - Slice slice = clusterState.getSlice(collectionName, shardId); + DocCollection collection = clusterState.getCollection(collectionName); + Slice slice = collection.getSlice(shardId); if (slice == null) { Map replicas = Collections.EMPTY_MAP; Map sliceProps = new HashMap<>(); @@ -63,8 +64,7 @@ public class CollectionMutator { if (shardParent != null) { sliceProps.put(Slice.PARENT, shardParent); } - DocCollection collection = updateSlice(collectionName, - clusterState.getCollection(collectionName), new Slice(shardId, replicas, sliceProps)); + collection = updateSlice(collectionName, collection, new Slice(shardId, replicas, sliceProps)); return new ZkWriteCommand(collectionName, collection); } else { log.error("Unable to create Shard: " + shardId + " because it already exists in collection: " + collectionName); diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/NodeMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/NodeMutator.java index 0784cd4e2f1..5dd27c1735d 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/NodeMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/NodeMutator.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.Map.Entry; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkNodeProps; @@ -50,12 +51,12 @@ public class NodeMutator { Set collections = clusterState.getCollections(); for (String collection : collections) { - - Map slicesCopy = new LinkedHashMap<>(clusterState.getSlicesMap(collection)); + DocCollection docCollection = clusterState.getCollection(collection); + Map slicesCopy = new LinkedHashMap<>(docCollection.getSlicesMap()); Set> entries = slicesCopy.entrySet(); for (Entry entry : entries) { - Slice slice = clusterState.getSlice(collection, entry.getKey()); + Slice slice = docCollection.getSlice(entry.getKey()); Map newReplicas = new HashMap(); Collection replicas = slice.getReplicas(); @@ -76,7 +77,7 @@ public class NodeMutator { } - zkWriteCommands.add(new ZkWriteCommand(collection, clusterState.getCollection(collection).copyWithSlices(slicesCopy))); + zkWriteCommands.add(new ZkWriteCommand(collection, docCollection.copyWithSlices(slicesCopy))); } return zkWriteCommands; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java index a3efc8099fc..5147f43797b 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java @@ -127,7 +127,8 @@ public class ReplicaMutator { isUnique = Boolean.parseBoolean(shardUnique); } - Replica replica = clusterState.getReplica(collectionName, replicaName); + DocCollection collection = clusterState.getCollection(collectionName); + Replica replica = collection.getReplica(replicaName); if (replica == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Could not find collection/slice/replica " + @@ -138,7 +139,7 @@ public class ReplicaMutator { if (StringUtils.equalsIgnoreCase(replica.getStr(property), propVal)) return ZkStateWriter.NO_OP; // already the value we're going to set // OK, there's no way we won't change the cluster state now - Map replicas = clusterState.getSlice(collectionName, sliceName).getReplicasCopy(); + Map replicas = collection.getSlice(sliceName).getReplicasCopy(); if (isUnique == false) { replicas.get(replicaName).getProperties().put(property, propVal); } else { // Set prop for this replica, but remove it for all others. @@ -150,8 +151,8 @@ public class ReplicaMutator { } } } - Slice newSlice = new Slice(sliceName, replicas, clusterState.getSlice(collectionName, sliceName).shallowCopy()); - DocCollection newCollection = CollectionMutator.updateSlice(collectionName, clusterState.getCollection(collectionName), + Slice newSlice = new Slice(sliceName, replicas, collection.getSlice(sliceName).shallowCopy()); + DocCollection newCollection = CollectionMutator.updateSlice(collectionName, collection, newSlice); return new ZkWriteCommand(collectionName, newCollection); } @@ -174,7 +175,8 @@ public class ReplicaMutator { property = OverseerCollectionMessageHandler.COLL_PROP_PREFIX + property; } - Replica replica = clusterState.getReplica(collectionName, replicaName); + DocCollection collection = clusterState.getCollection(collectionName); + Replica replica = collection.getReplica(replicaName); if (replica == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Could not find collection/slice/replica " + @@ -188,7 +190,6 @@ public class ReplicaMutator { log.info("Deleting property " + property + " for collection: " + collectionName + " slice " + sliceName + " replica " + replicaName + ". Full message: " + message); - DocCollection collection = clusterState.getCollection(collectionName); Slice slice = collection.getSlice(sliceName); DocCollection newCollection = SliceMutator.updateReplica(collection, slice, replicaName, unsetProperty(replica, property)); @@ -232,13 +233,15 @@ public class ReplicaMutator { String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); String coreNodeName = message.getStr(ZkStateReader.CORE_NODE_NAME_PROP); + DocCollection collection = prevState.getCollectionOrNull(collectionName); if (coreNodeName == null) { - coreNodeName = ClusterStateMutator.getAssignedCoreNodeName(prevState, message); + coreNodeName = ClusterStateMutator.getAssignedCoreNodeName(collection, + message.getStr(ZkStateReader.NODE_NAME_PROP), message.getStr(ZkStateReader.CORE_NAME_PROP)); if (coreNodeName != null) { log.info("node=" + coreNodeName + " is already registered"); } else { // if coreNodeName is null, auto assign one - coreNodeName = Assign.assignNode(collectionName, prevState); + coreNodeName = Assign.assignNode(collection); } message.getProperties().put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); @@ -247,7 +250,7 @@ public class ReplicaMutator { // use the provided non null shardId if (sliceName == null) { //get shardId from ClusterState - sliceName = ClusterStateMutator.getAssignedId(prevState, coreNodeName, message); + sliceName = ClusterStateMutator.getAssignedId(collection, coreNodeName); if (sliceName != null) { log.info("shard=" + sliceName + " is already registered"); } @@ -256,14 +259,14 @@ public class ReplicaMutator { //request new shardId if (collectionExists) { // use existing numShards - numShards = prevState.getCollection(collectionName).getSlices().size(); + numShards = collection.getSlices().size(); log.info("Collection already exists with " + ZkStateReader.NUM_SHARDS_PROP + "=" + numShards); } - sliceName = Assign.assignShard(collectionName, prevState, numShards); + sliceName = Assign.assignShard(collection, numShards); log.info("Assigning new node to shard shard=" + sliceName); } - Slice slice = prevState.getSlice(collectionName, sliceName); + Slice slice = collection != null ? collection.getSlice(sliceName) : null; Map replicaProps = new LinkedHashMap<>(); @@ -313,9 +316,7 @@ public class ReplicaMutator { Map sliceProps = null; Map replicas; - DocCollection collection = prevState.getCollectionOrNull(collectionName); if (slice != null) { - collection = prevState.getCollection(collectionName); collection = checkAndCompleteShardSplit(prevState, collection, coreNodeName, sliceName, replica); // get the current slice again because it may have been updated due to checkAndCompleteShardSplit method slice = collection.getSlice(sliceName); @@ -340,15 +341,16 @@ public class ReplicaMutator { * Handles non-legacy state updates */ protected ZkWriteCommand updateStateNew(ClusterState clusterState, final ZkNodeProps message) { - String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); if (!checkCollectionKeyExistence(message)) return ZkStateWriter.NO_OP; String sliceName = message.getStr(ZkStateReader.SHARD_ID_PROP); - if (collection == null || sliceName == null) { + if (collectionName == null || sliceName == null) { log.error("Invalid collection and slice {}", message); return ZkStateWriter.NO_OP; } - Slice slice = clusterState.getSlice(collection, sliceName); + DocCollection collection = clusterState.getCollectionOrNull(collectionName); + Slice slice = collection != null ? collection.getSlice(sliceName) : null; if (slice == null) { log.error("No such slice exists {}", message); return ZkStateWriter.NO_OP; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java index 63c37effe48..836d0146b13 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java @@ -69,7 +69,7 @@ public class SliceMutator { return ZkStateWriter.NO_OP; } - String coreNodeName = Assign.assignNode(coll, clusterState); + String coreNodeName = Assign.assignNode(collection); Replica replica = new Replica(coreNodeName, makeMap( ZkStateReader.CORE_NAME_PROP, message.getStr(ZkStateReader.CORE_NAME_PROP), @@ -149,18 +149,19 @@ public class SliceMutator { } public ZkWriteCommand updateShardState(ClusterState clusterState, ZkNodeProps message) { - String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); if (!checkCollectionKeyExistence(message)) return ZkStateWriter.NO_OP; - log.info("Update shard state invoked for collection: " + collection + " with message: " + message); + log.info("Update shard state invoked for collection: " + collectionName + " with message: " + message); - Map slicesCopy = new LinkedHashMap<>(clusterState.getSlicesMap(collection)); + DocCollection collection = clusterState.getCollection(collectionName); + Map slicesCopy = new LinkedHashMap<>(collection.getSlicesMap()); for (String key : message.keySet()) { if (ZkStateReader.COLLECTION_PROP.equals(key)) continue; if (Overseer.QUEUE_OPERATION.equals(key)) continue; - Slice slice = clusterState.getSlice(collection, key); + Slice slice = collection.getSlice(key); if (slice == null) { - throw new RuntimeException("Overseer.updateShardState unknown collection: " + collection + " slice: " + key); + throw new RuntimeException("Overseer.updateShardState unknown collection: " + collectionName + " slice: " + key); } log.info("Update shard state " + key + " to " + message.getStr(key)); Map props = slice.shallowCopy(); @@ -174,11 +175,11 @@ public class SliceMutator { slicesCopy.put(slice.getName(), newSlice); } - return new ZkWriteCommand(collection, clusterState.getCollection(collection).copyWithSlices(slicesCopy)); + return new ZkWriteCommand(collectionName, collection.copyWithSlices(slicesCopy)); } public ZkWriteCommand addRoutingRule(final ClusterState clusterState, ZkNodeProps message) { - String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); if (!checkCollectionKeyExistence(message)) return ZkStateWriter.NO_OP; String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKey = message.getStr("routeKey"); @@ -187,9 +188,10 @@ public class SliceMutator { String targetShard = message.getStr("targetShard"); String expireAt = message.getStr("expireAt"); - Slice slice = clusterState.getSlice(collection, shard); + DocCollection collection = clusterState.getCollection(collectionName); + Slice slice = collection.getSlice(shard); if (slice == null) { - throw new RuntimeException("Overseer.addRoutingRule unknown collection: " + collection + " slice:" + shard); + throw new RuntimeException("Overseer.addRoutingRule unknown collection: " + collectionName + " slice:" + shard); } Map routingRules = slice.getRoutingRules(); @@ -215,22 +217,23 @@ public class SliceMutator { props.put("routingRules", routingRules); Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props); - return new ZkWriteCommand(collection, - CollectionMutator.updateSlice(collection, clusterState.getCollection(collection), newSlice)); + return new ZkWriteCommand(collectionName, + CollectionMutator.updateSlice(collectionName, collection, newSlice)); } public ZkWriteCommand removeRoutingRule(final ClusterState clusterState, ZkNodeProps message) { - String collection = message.getStr(ZkStateReader.COLLECTION_PROP); + String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP); if (!checkCollectionKeyExistence(message)) return ZkStateWriter.NO_OP; String shard = message.getStr(ZkStateReader.SHARD_ID_PROP); String routeKeyStr = message.getStr("routeKey"); - log.info("Overseer.removeRoutingRule invoked for collection: " + collection + log.info("Overseer.removeRoutingRule invoked for collection: " + collectionName + " shard: " + shard + " routeKey: " + routeKeyStr); - Slice slice = clusterState.getSlice(collection, shard); + DocCollection collection = clusterState.getCollection(collectionName); + Slice slice = collection.getSlice(shard); if (slice == null) { - log.warn("Unknown collection: " + collection + " shard: " + shard); + log.warn("Unknown collection: " + collectionName + " shard: " + shard); return ZkStateWriter.NO_OP; } Map routingRules = slice.getRoutingRules(); @@ -239,8 +242,8 @@ public class SliceMutator { Map props = slice.shallowCopy(); props.put("routingRules", routingRules); Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props); - return new ZkWriteCommand(collection, - CollectionMutator.updateSlice(collection, clusterState.getCollection(collection), newSlice)); + return new ZkWriteCommand(collectionName, + CollectionMutator.updateSlice(collectionName, collection, newSlice)); } return ZkStateWriter.NO_OP; diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java index ec67ed7dbc4..e9edef1b4eb 100644 --- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java +++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java @@ -18,6 +18,7 @@ package org.apache.solr.cloud.overseer; import java.lang.invoke.MethodHandles; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -227,8 +228,8 @@ public class ZkStateWriter { } else if (c.getStateFormat() > 1) { byte[] data = Utils.toJSON(singletonMap(c.getName(), c)); if (reader.getZkClient().exists(path, true)) { - assert c.getZNodeVersion() >= 0; log.info("going to update_collection {} version: {}", path, c.getZNodeVersion()); + assert c.getZNodeVersion() >= 0; Stat stat = reader.getZkClient().setData(path, data, c.getZNodeVersion(), true); DocCollection newCollection = new DocCollection(name, c.getSlicesMap(), c.getProperties(), c.getRouter(), stat.getVersion(), path); clusterState = clusterState.copyWith(name, newCollection); diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index bb0cd059852..b94b3d8e17a 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -55,6 +55,7 @@ import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.ZkSolrResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.params.CommonParams; @@ -834,8 +835,8 @@ public final class SolrCore implements SolrInfoMBean, Closeable { // ZK pre-register would have already happened so we read slice properties now final ClusterState clusterState = cc.getZkController().getClusterState(); - final Slice slice = clusterState.getSlice(coreDescriptor.getCloudDescriptor().getCollectionName(), - coreDescriptor.getCloudDescriptor().getShardId()); + final DocCollection collection = clusterState.getCollection(coreDescriptor.getCloudDescriptor().getCollectionName()); + final Slice slice = collection.getSlice(coreDescriptor.getCloudDescriptor().getShardId()); if (slice.getState() == Slice.State.CONSTRUCTION) { // set update log to buffer before publishing the core getUpdateHandler().getUpdateLog().bufferUpdates(); diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrUpdateLogSynchronizer.java b/solr/core/src/java/org/apache/solr/handler/CdcrUpdateLogSynchronizer.java index 7071908e70f..48bfec0c379 100644 --- a/solr/core/src/java/org/apache/solr/handler/CdcrUpdateLogSynchronizer.java +++ b/solr/core/src/java/org/apache/solr/handler/CdcrUpdateLogSynchronizer.java @@ -28,6 +28,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.params.CommonParams; @@ -113,7 +114,8 @@ class CdcrUpdateLogSynchronizer implements CdcrStateManager.CdcrStateObserver { private String getLeaderUrl() { ZkController zkController = core.getCoreDescriptor().getCoreContainer().getZkController(); ClusterState cstate = zkController.getClusterState(); - ZkNodeProps leaderProps = cstate.getLeader(collection, shardId); + DocCollection docCollection = cstate.getCollection(collection); + ZkNodeProps leaderProps = docCollection.getLeader(shardId); if (leaderProps == null) { // we might not have a leader yet, returns null return null; } diff --git a/solr/core/src/java/org/apache/solr/handler/SQLHandler.java b/solr/core/src/java/org/apache/solr/handler/SQLHandler.java index aa400461f00..c26c5a8db5e 100644 --- a/solr/core/src/java/org/apache/solr/handler/SQLHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/SQLHandler.java @@ -55,6 +55,7 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.metrics.*; import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 64b10ab0c0f..edafc54fb55 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -425,7 +425,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission ClusterState clusterState = h.coreContainer.getZkController().getClusterState(); - ZkNodeProps leaderProps = clusterState.getLeader(collection, shard); + DocCollection docCollection = clusterState.getCollection(collection); + ZkNodeProps leaderProps = docCollection.getLeader(shard); ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps); try (HttpSolrClient client = new Builder(nodeProps.getBaseUrl()).build()) { @@ -828,18 +829,15 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission private static void forceLeaderElection(SolrQueryRequest req, CollectionsHandler handler) { ClusterState clusterState = handler.coreContainer.getZkController().getClusterState(); - String collection = req.getParams().required().get(COLLECTION_PROP); + String collectionName = req.getParams().required().get(COLLECTION_PROP); String sliceId = req.getParams().required().get(SHARD_ID_PROP); log.info("Force leader invoked, state: {}", clusterState); - Slice slice = clusterState.getSlice(collection, sliceId); + DocCollection collection = clusterState.getCollection(collectionName); + Slice slice = collection.getSlice(sliceId); if (slice == null) { - if (clusterState.hasCollection(collection)) { - throw new SolrException(ErrorCode.BAD_REQUEST, - "No shard with name " + sliceId + " exists for collection " + collection); - } else { - throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collection); - } + throw new SolrException(ErrorCode.BAD_REQUEST, + "No shard with name " + sliceId + " exists for collection " + collectionName); } try { @@ -851,7 +849,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission } // Clear out any LIR state - String lirPath = handler.coreContainer.getZkController().getLeaderInitiatedRecoveryZnodePath(collection, sliceId); + String lirPath = handler.coreContainer.getZkController().getLeaderInitiatedRecoveryZnodePath(collectionName, sliceId); if (handler.coreContainer.getZkController().getZkClient().exists(lirPath, true)) { StringBuilder sb = new StringBuilder(); handler.coreContainer.getZkController().getZkClient().printLayout(lirPath, 4, sb); @@ -880,7 +878,8 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission for (int i = 0; i < 9; i++) { Thread.sleep(5000); clusterState = handler.coreContainer.getZkController().getClusterState(); - slice = clusterState.getSlice(collection, sliceId); + collection = clusterState.getCollection(collectionName); + slice = collection.getSlice(sliceId); if (slice.getLeader() != null && slice.getLeader().getState() == State.ACTIVE) { success = true; break; @@ -889,15 +888,15 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission } if (success) { - log.info("Successfully issued FORCELEADER command for collection: {}, shard: {}", collection, sliceId); + log.info("Successfully issued FORCELEADER command for collection: {}, shard: {}", collectionName, sliceId); } else { - log.info("Couldn't successfully force leader, collection: {}, shard: {}. Cluster state: {}", collection, sliceId, clusterState); + log.info("Couldn't successfully force leader, collection: {}, shard: {}. Cluster state: {}", collectionName, sliceId, clusterState); } } catch (SolrException e) { throw e; } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, - "Error executing FORCELEADER operation for collection: " + collection + " shard: " + sliceId, e); + "Error executing FORCELEADER operation for collection: " + collectionName + " shard: " + sliceId, e); } } diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java index e755b82ff49..bdc916892c1 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java @@ -369,7 +369,7 @@ enum CoreAdminOperation { String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(); DocCollection collection = clusterState.getCollection(collectionName); String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId(); - Slice slice = clusterState.getSlice(collectionName, sliceName); + Slice slice = collection.getSlice(sliceName); router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT; if (ranges == null) { DocRouter.Range currentRange = slice.getRange(); @@ -461,7 +461,7 @@ enum CoreAdminOperation { // to accept updates CloudDescriptor cloudDescriptor = core.getCoreDescriptor() .getCloudDescriptor(); - String collection = cloudDescriptor.getCollectionName(); + String collectionName = cloudDescriptor.getCollectionName(); if (retry % 15 == 0) { if (retry > 0 && log.isInfoEnabled()) @@ -471,7 +471,7 @@ enum CoreAdminOperation { waitForState + "; forcing ClusterState update from ZooKeeper"); // force a cluster state update - coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collection); + coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName); } if (maxTries == 0) { @@ -484,7 +484,8 @@ enum CoreAdminOperation { } ClusterState clusterState = coreContainer.getZkController().getClusterState(); - Slice slice = clusterState.getSlice(collection, cloudDescriptor.getShardId()); + DocCollection collection = clusterState.getCollection(collectionName); + Slice slice = collection.getSlice(cloudDescriptor.getShardId()); if (slice != null) { final Replica replica = slice.getReplicasMap().get(coreNodeName); if (replica != null) { @@ -508,7 +509,7 @@ enum CoreAdminOperation { } boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive && localState != Replica.State.ACTIVE; - log.info("In WaitForState(" + waitForState + "): collection=" + collection + ", shard=" + slice.getName() + + log.info("In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() + ", thisCore=" + core.getName() + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery + ", isLeader? " + core.getCoreDescriptor().getCloudDescriptor().isLeader() + ", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString() + ", localState=" + localState + ", nodeName=" + nodeName + diff --git a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java index eb90762c1e9..6ba571af141 100644 --- a/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java +++ b/solr/core/src/java/org/apache/solr/servlet/HttpSolrCall.java @@ -66,6 +66,7 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkNodeProps; @@ -756,11 +757,15 @@ public class HttpSolrCall { return result; } - private SolrCore getCoreByCollection(String collection) { + private SolrCore getCoreByCollection(String collectionName) { ZkStateReader zkStateReader = cores.getZkController().getZkStateReader(); ClusterState clusterState = zkStateReader.getClusterState(); - Map slices = clusterState.getActiveSlicesMap(collection); + DocCollection collection = clusterState.getCollectionOrNull(collectionName); + if (collection == null) { + return null; + } + Map slices = collection.getActiveSlicesMap(); if (slices == null) { return null; } @@ -773,7 +778,7 @@ public class HttpSolrCall { //For queries it doesn't matter and hence we don't distinguish here. for (Map.Entry entry : entries) { // first see if we have the leader - Replica leaderProps = clusterState.getLeader(collection, entry.getKey()); + Replica leaderProps = collection.getLeader(entry.getKey()); if (leaderProps != null && liveNodes.contains(leaderProps.getNodeName()) && leaderProps.getState() == Replica.State.ACTIVE) { core = checkProps(leaderProps); if (core != null) { diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java index 5f4e4f1dd66..67c88ddb9f7 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java @@ -547,8 +547,8 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { throw new SolrException(ErrorCode.SERVER_ERROR, "No active slices serving " + id + " found for target collection: " + rule.getTargetCollectionName()); } - Replica targetLeader = cstate.getLeader(rule.getTargetCollectionName(), activeSlices.iterator().next().getName()); - if (nodes == null) nodes = new ArrayList<>(1); + Replica targetLeader = targetColl.getLeader(activeSlices.iterator().next().getName()); + nodes = new ArrayList<>(1); nodes.add(new StdNode(new ZkCoreNodeProps(targetLeader))); break; } @@ -596,7 +596,8 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { ClusterState clusterState = zkController.getClusterState(); CloudDescriptor cloudDescriptor = req.getCore().getCoreDescriptor().getCloudDescriptor(); - Slice mySlice = clusterState.getSlice(collection, cloudDescriptor.getShardId()); + DocCollection docCollection = clusterState.getCollection(collection); + Slice mySlice = docCollection.getSlice(cloudDescriptor.getShardId()); boolean localIsLeader = cloudDescriptor.isLeader(); if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) { // from will be null on log replay String fromShard = req.getParams().get(DISTRIB_FROM_PARENT); @@ -606,7 +607,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { "Request says it is coming from parent shard leader but we are in active state"); } // shard splitting case -- check ranges to see if we are a sub-shard - Slice fromSlice = zkController.getClusterState().getCollection(collection).getSlice(fromShard); + Slice fromSlice = docCollection.getSlice(fromShard); DocRouter.Range parentRange = fromSlice.getRange(); if (parentRange == null) parentRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE); if (mySlice.getRange() != null && !mySlice.getRange().isSubsetOf(parentRange)) { diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 9627671faeb..d21400d61e1 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -88,6 +88,7 @@ import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkCoreNodeProps; diff --git a/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java b/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java index 8b1ca1fc800..b79ec0c678b 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java +++ b/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java @@ -28,6 +28,7 @@ import org.apache.log4j.spi.ThrowableInformation; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; import org.apache.solr.common.StringUtils; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.core.SolrCore; @@ -236,10 +237,11 @@ public class SolrLogLayout extends Layout { return sb.toString(); } - private Map getReplicaProps(ZkController zkController, SolrCore core) { - final String collection = core.getCoreDescriptor().getCloudDescriptor().getCollectionName(); - Replica replica = zkController.getClusterState().getReplica(collection, zkController.getCoreNodeName(core.getCoreDescriptor())); - if(replica!=null) { + private Map getReplicaProps(ZkController zkController, SolrCore core) { + final String collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName(); + DocCollection collection = zkController.getClusterState().getCollectionOrNull(collectionName); + Replica replica = collection.getReplica(zkController.getCoreNodeName(core.getCoreDescriptor())); + if (replica != null) { return replica.getProperties(); } return Collections.EMPTY_MAP; diff --git a/solr/core/src/test/org/apache/solr/cloud/AssignTest.java b/solr/core/src/test/org/apache/solr/cloud/AssignTest.java index 6069650e1b7..7593f3b32ed 100644 --- a/solr/core/src/test/org/apache/solr/cloud/AssignTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/AssignTest.java @@ -82,7 +82,7 @@ public class AssignTest extends SolrTestCaseJ4 { Set liveNodes = new HashSet<>(); ClusterState state = new ClusterState(-1,liveNodes, collectionStates); - String nodeName = Assign.assignNode("collection1", state); + String nodeName = Assign.assignNode(state.getCollection("collection1")); assertEquals("core_node2", nodeName); } diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java index accc36a6ed5..7dd77d96a75 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPIDistributedZkTest.java @@ -1193,12 +1193,12 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa String collectionName = "addReplicaColl"; try (CloudSolrClient client = createCloudClient(null)) { createCollection(collectionName, client, 2, 2); - String newReplicaName = Assign.assignNode(collectionName, client.getZkStateReader().getClusterState()); + String newReplicaName = Assign.assignNode(client.getZkStateReader().getClusterState().getCollection(collectionName)); ArrayList nodeList = new ArrayList<>(client.getZkStateReader().getClusterState().getLiveNodes()); Collections.shuffle(nodeList, random()); Replica newReplica = doAddReplica(collectionName, "shard1", - Assign.assignNode(collectionName, client.getZkStateReader().getClusterState()), + Assign.assignNode(client.getZkStateReader().getClusterState().getCollection(collectionName)), nodeList.get(0), client, null); log.info("newReplica {},\n{} ", newReplica, client.getZkStateReader().getBaseUrlForNodeName(nodeList.get(0))); @@ -1210,7 +1210,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa String instancePathStr = createTempDir().toString(); props.put(CoreAdminParams.INSTANCE_DIR, instancePathStr); //Use name via the property.instanceDir method newReplica = doAddReplica(collectionName, "shard2", - Assign.assignNode(collectionName, client.getZkStateReader().getClusterState()), + Assign.assignNode(client.getZkStateReader().getClusterState().getCollection(collectionName)), null, client, props); assertNotNull(newReplica); @@ -1244,7 +1244,7 @@ public class CollectionsAPIDistributedZkTest extends AbstractFullDistribZkTestBa props.put(CoreAdminParams.NAME, "propertyDotName"); newReplica = doAddReplica(collectionName, "shard1", - Assign.assignNode(collectionName, client.getZkStateReader().getClusterState()), + Assign.assignNode(client.getZkStateReader().getClusterState().getCollection(collectionName)), nodeList.get(0), client, props); assertEquals("'core' should be 'propertyDotName' ", "propertyDotName", newReplica.getStr("core")); } diff --git a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java index 52ae96f0487..0975b9aae2e 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java @@ -276,7 +276,7 @@ public class CollectionsAPISolrJTest extends AbstractFullDistribZkTestBase { cloudClient.setDefaultCollection(collectionName); - String newReplicaName = Assign.assignNode(collectionName, cloudClient.getZkStateReader().getClusterState()); + String newReplicaName = Assign.assignNode(cloudClient.getZkStateReader().getClusterState().getCollection(collectionName)); ArrayList nodeList = new ArrayList<>(cloudClient.getZkStateReader().getClusterState().getLiveNodes()); Collections.shuffle(nodeList, random()); CollectionAdminRequest.AddReplica addReplica = new CollectionAdminRequest.AddReplica() diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java index 85a88ec3ae9..7863899504c 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -687,7 +688,7 @@ public class OverseerTest extends SolrTestCaseJ4 { while (version == getClusterStateVersion(zkClient)); Thread.sleep(500); assertTrue(collection+" should remain after removal of the last core", // as of SOLR-5209 core removal does not cascade to remove the slice and collection - reader.getClusterState().getCollections().contains(collection)); + reader.getClusterState().hasCollection(collection)); assertTrue(core_node+" should be gone after publishing the null state", null == reader.getClusterState().getCollection(collection).getReplica(core_node)); } finally { diff --git a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java index 9eca6db26b8..893b6febd90 100644 --- a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java +++ b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java @@ -30,6 +30,7 @@ import org.apache.solr.client.solrj.io.stream.ExceptionStream; import org.apache.solr.client.solrj.io.stream.SolrStream; import org.apache.solr.client.solrj.io.stream.TupleStream; import org.apache.solr.cloud.AbstractFullDistribZkTestBase; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.params.CommonParams; import org.junit.After; import org.junit.AfterClass; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java index 1a6139ee427..97a804d176d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TopicStream.java @@ -51,6 +51,7 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkCoreNodeProps; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java index 2495c41061a..d60cccb3a0b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterState.java @@ -90,7 +90,9 @@ public class ClusterState implements JSONWriter.Writable { /** * Get the lead replica for specific collection, or null if one currently doesn't exist. + * @deprecated Use {@link DocCollection#getLeader(String)} instead */ + @Deprecated public Replica getLeader(String collection, String sliceName) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; @@ -98,14 +100,6 @@ public class ClusterState implements JSONWriter.Writable { if (slice == null) return null; return slice.getLeader(); } - private Replica getReplica(DocCollection coll, String replicaName) { - if (coll == null) return null; - for (Slice slice : coll.getSlices()) { - Replica replica = slice.getReplica(replicaName); - if (replica != null) return replica; - } - return null; - } /** * Returns true if the specified collection name exists, false otherwise. @@ -113,48 +107,76 @@ public class ClusterState implements JSONWriter.Writable { * Implementation note: This method resolves the collection reference by calling * {@link CollectionRef#get()} which can make a call to ZooKeeper. This is necessary * because the semantics of how collection list is loaded have changed in SOLR-6629. - * Please javadocs in {@link ZkStateReader#refreshCollectionList(Watcher)} + * Please see javadocs in {@link ZkStateReader#refreshCollectionList(Watcher)} */ public boolean hasCollection(String collectionName) { return getCollectionOrNull(collectionName) != null; } /** - * Gets the replica by the core name (assuming the slice is unknown) or null if replica is not found. + * Gets the replica by the core node name (assuming the slice is unknown) or null if replica is not found. * If the slice is known, do not use this method. * coreNodeName is the same as replicaName + * + * @deprecated use {@link DocCollection#getReplica(String)} instead */ + @Deprecated public Replica getReplica(final String collection, final String coreNodeName) { - return getReplica(getCollectionOrNull(collection), coreNodeName); + DocCollection coll = getCollectionOrNull(collection); + if (coll == null) return null; + for (Slice slice : coll.getSlices()) { + Replica replica = slice.getReplica(coreNodeName); + if (replica != null) return replica; + } + return null; } /** * Get the named Slice for collection, or null if not found. + * + * @deprecated use {@link DocCollection#getSlice(String)} instead */ + @Deprecated public Slice getSlice(String collection, String sliceName) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; return coll.getSlice(sliceName); } + /** + * @deprecated use {@link DocCollection#getSlicesMap()} instead + */ + @Deprecated public Map getSlicesMap(String collection) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; return coll.getSlicesMap(); } - + + /** + * @deprecated use {@link DocCollection#getActiveSlicesMap()} instead + */ + @Deprecated public Map getActiveSlicesMap(String collection) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; return coll.getActiveSlicesMap(); } + /** + * @deprecated use {@link DocCollection#getSlices()} instead + */ + @Deprecated public Collection getSlices(String collection) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; return coll.getSlices(); } + /** + * @deprecated use {@link DocCollection#getActiveSlices()} instead + */ + @Deprecated public Collection getActiveSlices(String collection) { DocCollection coll = getCollectionOrNull(collection); if (coll == null) return null; @@ -195,7 +217,7 @@ public class ClusterState implements JSONWriter.Writable { * Implementation note: This method resolves the collection reference by calling * {@link CollectionRef#get()} which can make a call to ZooKeeper. This is necessary * because the semantics of how collection list is loaded have changed in SOLR-6629. - * Please javadocs in {@link ZkStateReader#refreshCollectionList(Watcher)} + * Please see javadocs in {@link ZkStateReader#refreshCollectionList(Watcher)} */ public Set getCollections() { Set result = new HashSet<>(); diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java index 1628756a66f..a298cb3ce8d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java @@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java index e8f26e1a4cc..9e4418c16f7 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/DocCollection.java @@ -209,4 +209,10 @@ public class DocCollection extends ZkNodeProps { } return null; } + + public Replica getLeader(String sliceName) { + Slice slice = getSlice(sliceName); + if (slice == null) return null; + return slice.getLeader(); + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index c6f88c06b4b..568c791af4a 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -646,7 +646,8 @@ public class ZkStateReader implements Closeable { public Replica getLeader(String collection, String shard) { if (clusterState != null) { - Replica replica = clusterState.getLeader(collection, shard); + DocCollection docCollection = clusterState.getCollectionOrNull(collection); + Replica replica = docCollection != null ? docCollection.getLeader(shard) : null; if (replica != null && getClusterState().liveNodesContain(replica.getNodeName())) { return replica; } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java index 90500927f26..d38661e9e95 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.cloud.AbstractFullDistribZkTestBase; import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.common.cloud.DocCollection; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; From 89c65af2a6e5f1c8216c1202f65e8d670ef14385 Mon Sep 17 00:00:00 2001 From: Scott Blum Date: Mon, 25 Apr 2016 21:15:02 -0400 Subject: [PATCH 17/18] SOLR-9029: fix rare ZkStateReader visibility race during collection state format update --- solr/CHANGES.txt | 2 ++ .../java/org/apache/solr/common/cloud/ZkStateReader.java | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 999bd737fd9..77029502213 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -150,6 +150,8 @@ Bug Fixes * SOLR-8992: Restore Schema API GET method functionality removed in 6.0 (noble, Steve Rowe) +* SOLR-9029: fix rare ZkStateReader visibility race during collection state format update (Scott Blum, hossman) + Optimizations ---------------------- * SOLR-8722: Don't force a full ZkStateReader refresh on every Overseer operation. diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 568c791af4a..1e57d7eda41 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -263,9 +263,9 @@ public class ZkStateReader implements Closeable { } ClusterState.CollectionRef ref = clusterState.getCollectionRef(collection); - if (ref == null) { - // We don't know anything about this collection, maybe it's new? - // First try to update the legacy cluster state. + if (ref == null || legacyCollectionStates.containsKey(collection)) { + // We either don't know anything about this collection (maybe it's new?) or it's legacy. + // First update the legacy cluster state. refreshLegacyClusterState(null); if (!legacyCollectionStates.containsKey(collection)) { // No dice, see if a new collection just got created. @@ -283,9 +283,6 @@ public class ZkStateReader implements Closeable { } // Edge case: if there's no external collection, try refreshing legacy cluster state in case it's there. refreshLegacyClusterState(null); - } else if (legacyCollectionStates.containsKey(collection)) { - // Exists, and lives in legacy cluster state, force a refresh. - refreshLegacyClusterState(null); } else if (watchedCollectionStates.containsKey(collection)) { // Exists as a watched collection, force a refresh. DocCollection newState = fetchCollectionState(collection, null); From 6fa5166e41652fc58a5f18db4796e230b1354dbd Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 26 Apr 2016 09:16:27 -0400 Subject: [PATCH 18/18] LUCENE-7254: (sandbox/ only) Don't let abuse cases slow down spatial queries --- .../apache/lucene/document/LatLonPoint.java | 3 +- .../lucene/document/LatLonPointBoxQuery.java | 287 ++++++++++++++++++ .../document/LatLonPointDistanceQuery.java | 17 +- .../document/LatLonPointInPolygonQuery.java | 18 +- .../lucene/document/MatchingPoints.java | 90 ++++++ 5 files changed, 382 insertions(+), 33 deletions(-) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java create mode 100644 lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index 426a7022a6a..a63e4bd0c8c 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -34,7 +34,6 @@ import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopFieldDocs; @@ -229,7 +228,7 @@ public class LatLonPoint extends Field { } private static Query newBoxInternal(String field, byte[] min, byte[] max) { - return new PointRangeQuery(field, min, max, 2) { + return new LatLonPointBoxQuery(field, min, max, 2) { @Override protected String toString(int dimension, byte[] value) { if (dimension == 0) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java new file mode 100644 index 00000000000..423af05f57e --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointBoxQuery.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; + +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.StringHelper; + +/** + * Fast version of {@link PointRangeQuery}. It is fast for actual range queries! + * @lucene.experimental + */ +abstract class LatLonPointBoxQuery extends Query { + final String field; + final int numDims; + final int bytesPerDim; + final byte[] lowerPoint; + final byte[] upperPoint; + + /** + * Expert: create a multidimensional range query for point values. + * + * @param field field name. must not be {@code null}. + * @param lowerPoint lower portion of the range (inclusive). + * @param upperPoint upper portion of the range (inclusive). + * @param numDims number of dimensions. + * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length} + */ + protected LatLonPointBoxQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) { + checkArgs(field, lowerPoint, upperPoint); + this.field = field; + if (numDims <= 0) { + throw new IllegalArgumentException("numDims must be positive, got " + numDims); + } + if (lowerPoint.length == 0) { + throw new IllegalArgumentException("lowerPoint has length of zero"); + } + if (lowerPoint.length % numDims != 0) { + throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims"); + } + if (lowerPoint.length != upperPoint.length) { + throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length); + } + this.numDims = numDims; + this.bytesPerDim = lowerPoint.length / numDims; + + this.lowerPoint = lowerPoint; + this.upperPoint = upperPoint; + } + + /** + * Check preconditions for all factory methods + * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null. + */ + public static void checkArgs(String field, Object lowerPoint, Object upperPoint) { + if (field == null) { + throw new IllegalArgumentException("field must not be null"); + } + if (lowerPoint == null) { + throw new IllegalArgumentException("lowerPoint must not be null"); + } + if (upperPoint == null) { + throw new IllegalArgumentException("upperPoint must not be null"); + } + } + + @Override + public final Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + + // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". + // This is an inverted structure and should be used in the first pass: + + return new ConstantScoreWeight(this) { + + private DocIdSetIterator buildMatchingIterator(LeafReader reader, PointValues values) throws IOException { + MatchingPoints result = new MatchingPoints(reader, field); + + values.intersect(field, + new IntersectVisitor() { + + @Override + public void visit(int docID) { + result.add(docID); + } + + @Override + public void visit(int docID, byte[] packedValue) { + for(int dim=0;dim 0) { + // Doc's value is too high, in this dimension + return; + } + } + + // Doc is in-bounds + result.add(docID); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + + boolean crosses = false; + + for(int dim=0;dim 0 || + StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) { + return Relation.CELL_OUTSIDE_QUERY; + } + + crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || + StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0; + } + + if (crosses) { + return Relation.CELL_CROSSES_QUERY; + } else { + return Relation.CELL_INSIDE_QUERY; + } + } + }); + return result.iterator(); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + LeafReader reader = context.reader(); + PointValues values = reader.getPointValues(); + if (values == null) { + // No docs in this segment indexed any points + return null; + } + FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo == null) { + // No docs in this segment indexed this field at all + return null; + } + if (fieldInfo.getPointDimensionCount() != numDims) { + throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=" + numDims); + } + if (bytesPerDim != fieldInfo.getPointNumBytes()) { + throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim); + } + + boolean allDocsMatch; + if (values.getDocCount(field) == reader.maxDoc()) { + final byte[] fieldPackedLower = values.getMinPackedValue(field); + final byte[] fieldPackedUpper = values.getMaxPackedValue(field); + allDocsMatch = true; + for (int i = 0; i < numDims; ++i) { + int offset = i * bytesPerDim; + if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0 + || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) { + allDocsMatch = false; + break; + } + } + } else { + allDocsMatch = false; + } + + DocIdSetIterator iterator; + if (allDocsMatch) { + // all docs have a value and all points are within bounds, so everything matches + iterator = DocIdSetIterator.all(reader.maxDoc()); + } else { + iterator = buildMatchingIterator(reader, values); + } + + return new ConstantScoreScorer(this, score(), iterator); + } + }; + } + + @Override + public final int hashCode() { + int hash = super.hashCode(); + hash = 31 * hash + field.hashCode(); + hash = 31 * hash + Arrays.hashCode(lowerPoint); + hash = 31 * hash + Arrays.hashCode(upperPoint); + hash = 31 * hash + numDims; + hash = 31 * hash + Objects.hashCode(bytesPerDim); + return hash; + } + + @Override + public final boolean equals(Object other) { + if (super.equals(other) == false) { + return false; + } + + final LatLonPointBoxQuery q = (LatLonPointBoxQuery) other; + if (field.equals(q.field) == false) { + return false; + } + + if (q.numDims != numDims) { + return false; + } + + if (q.bytesPerDim != bytesPerDim) { + return false; + } + + if (Arrays.equals(lowerPoint, q.lowerPoint) == false) { + return false; + } + + if (Arrays.equals(upperPoint, q.upperPoint) == false) { + return false; + } + + return true; + } + + @Override + public final String toString(String field) { + final StringBuilder sb = new StringBuilder(); + if (this.field.equals(field) == false) { + sb.append(this.field); + sb.append(':'); + } + + // print ourselves as "range per dimension" + for (int i = 0; i < numDims; i++) { + if (i > 0) { + sb.append(','); + } + + int startOffset = bytesPerDim * i; + + sb.append('['); + sb.append(toString(i, Arrays.copyOfRange(lowerPoint, startOffset, startOffset + bytesPerDim))); + sb.append(" TO "); + sb.append(toString(i, Arrays.copyOfRange(upperPoint, startOffset, startOffset + bytesPerDim))); + sb.append(']'); + } + + return sb.toString(); + } + + /** + * Returns a string of a single value in a human-readable format for debugging. + * This is used by {@link #toString()}. + * + * @param dimension dimension of the particular value + * @param value single value, never null + * @return human readable value for debugging + */ + protected abstract String toString(int dimension, byte[] value); +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 9bd78fecbbc..0759ce1f0dc 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -28,13 +28,10 @@ import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.SloppyMath; import org.apache.lucene.util.StringHelper; @@ -120,15 +117,10 @@ final class LatLonPointDistanceQuery extends Query { LatLonPoint.checkCompatible(fieldInfo); // matching docids - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + MatchingPoints result = new MatchingPoints(reader, field); values.intersect(field, new IntersectVisitor() { - @Override - public void grow(int count) { - result.grow(count); - } - @Override public void visit(int docID) { result.add(docID); @@ -209,12 +201,7 @@ final class LatLonPointDistanceQuery extends Query { } }); - DocIdSet set = result.build(); - final DocIdSetIterator disi = set.iterator(); - if (disi == null) { - return null; - } - return new ConstantScoreScorer(this, score(), disi); + return new ConstantScoreScorer(this, score(), result.iterator()); } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index ee7c1e8c157..506e6b9d5a7 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -24,8 +24,6 @@ import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; @@ -34,7 +32,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.StringHelper; import org.apache.lucene.geo.Polygon; @@ -113,15 +110,10 @@ final class LatLonPointInPolygonQuery extends Query { LatLonPoint.checkCompatible(fieldInfo); // matching docids - DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); + MatchingPoints result = new MatchingPoints(reader, field); values.intersect(field, new IntersectVisitor() { - @Override - public void grow(int count) { - result.grow(count); - } - @Override public void visit(int docID) { result.add(docID); @@ -154,13 +146,7 @@ final class LatLonPointInPolygonQuery extends Query { } }); - DocIdSet set = result.build(); - final DocIdSetIterator disi = set.iterator(); - if (disi == null) { - return null; - } - - return new ConstantScoreScorer(this, score(), disi); + return new ConstantScoreScorer(this, score(), result.iterator()); } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java new file mode 100644 index 00000000000..2b6c1242de4 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/MatchingPoints.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.SparseFixedBitSet; + +/** + * Accumulates matching hits for points. + *

+ * Add matches with ({@link #add(int)}) and call {@link #iterator()} for + * an iterator over the results. + *

+ * This implementation currently optimizes bitset structure (sparse vs dense) + * and {@link DocIdSetIterator#cost()} (cardinality) based on index statistics. + * This API may change as point values evolves. + * + * @lucene.experimental + */ +final class MatchingPoints { + /** bitset we collect into */ + private final BitSet bits; + /** number of documents containing a value for the points field */ + private final int docCount; + /** number of values indexed for the points field */ + private final long numPoints; + /** number of documents in the index segment */ + private final int maxDoc; + /** counter of hits seen */ + private long counter; + + /** + * Creates a new accumulator. + * @param reader reader to collect point matches from + * @param field field name. + */ + public MatchingPoints(LeafReader reader, String field) { + maxDoc = reader.maxDoc(); + PointValues values = reader.getPointValues(); + if (values == null) { + throw new IllegalStateException("the query is missing null checks"); + } + docCount = values.getDocCount(field); + numPoints = values.size(field); + // heuristic: if the field is really sparse, use a sparse impl + if (docCount >= 0 && docCount * 100L < maxDoc) { + bits = new SparseFixedBitSet(maxDoc); + } else { + bits = new FixedBitSet(maxDoc); + } + } + + /** + * Record a matching docid. + *

+ * NOTE: doc IDs do not need to be provided in any order. + */ + public void add(int doc) { + bits.set(doc); + counter++; + } + + /** + * Returns an iterator over the recorded matches. + */ + public DocIdSetIterator iterator() { + // if single-valued (docCount == numPoints), then this is exact + // otherwise its approximate based on field stats + return new BitSetIterator(bits, (long) (counter * (docCount / (double) numPoints))); + } +}