From c4f6e3053e17026fd9393f139ae4b4d18fceb1fa Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sat, 2 Feb 2013 23:20:38 +0000 Subject: [PATCH 01/18] Replace "throw exception" with the more idiomatic Junit's "assertNull", following Uwe's advice. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441831 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/BaseTermVectorsFormatTestCase.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java index c76b9442075..63de145d89f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java @@ -721,9 +721,7 @@ public abstract class BaseTermVectorsFormatTestCase extends LuceneTestCase { reader.close(); writer.close(); dir.close(); - if (exception.get() != null) { - throw new RuntimeException("One thread threw an exception", exception.get()); - } + assertNull("One thread threw an exception", exception.get()); } } From fe719d35689fd2adc3ad4aed635e704ea6cdcba4 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 14:17:42 +0000 Subject: [PATCH 02/18] add timeouts to http calls in SnapPuller git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441913 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/handler/SnapPuller.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java index 08877223f5d..195ffd2d460 100644 --- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java +++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java @@ -73,7 +73,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpSolrServer; @@ -86,8 +85,8 @@ import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.FastInputStream; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CachingDirectoryFactory.CloseListener; -import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.DirectoryFactory; +import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.IndexDeletionPolicyWrapper; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.ReplicationHandler.FileInfo; @@ -244,7 +243,9 @@ public class SnapPuller { params.set(CommonParams.WT, "javabin"); params.set(CommonParams.QT, "/replication"); QueryRequest req = new QueryRequest(params); - SolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX modify to use shardhandler + HttpSolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX modify to use shardhandler + server.setSoTimeout(60000); + server.setConnectionTimeout(15000); try { return server.request(req); } catch (SolrServerException e) { @@ -262,7 +263,9 @@ public class SnapPuller { params.set(CommonParams.WT, "javabin"); params.set(CommonParams.QT, "/replication"); QueryRequest req = new QueryRequest(params); - SolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX modify to use shardhandler + HttpSolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX modify to use shardhandler + server.setSoTimeout(60000); + server.setConnectionTimeout(15000); try { NamedList response = server.request(req); @@ -1237,7 +1240,9 @@ public class SnapPuller { * Open a new stream using HttpClient */ FastInputStream getStream() throws IOException { - SolrServer s = new HttpSolrServer(masterUrl, myHttpClient, null); //XXX use shardhandler + HttpSolrServer s = new HttpSolrServer(masterUrl, myHttpClient, null); //XXX use shardhandler + s.setSoTimeout(60000); + s.setConnectionTimeout(15000); ModifiableSolrParams params = new ModifiableSolrParams(); // //the method is command=filecontent @@ -1496,7 +1501,9 @@ public class SnapPuller { * Open a new stream using HttpClient */ FastInputStream getStream() throws IOException { - SolrServer s = new HttpSolrServer(masterUrl, myHttpClient, null); //XXX use shardhandler + HttpSolrServer s = new HttpSolrServer(masterUrl, myHttpClient, null); //XXX use shardhandler + s.setSoTimeout(60000); + s.setConnectionTimeout(15000); ModifiableSolrParams params = new ModifiableSolrParams(); // //the method is command=filecontent @@ -1549,7 +1556,9 @@ public class SnapPuller { params.set(COMMAND, CMD_DETAILS); params.set("slave", false); params.set(CommonParams.QT, "/replication"); - SolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX use shardhandler + HttpSolrServer server = new HttpSolrServer(masterUrl, myHttpClient); //XXX use shardhandler + server.setSoTimeout(60000); + server.setConnectionTimeout(15000); QueryRequest request = new QueryRequest(params); return server.request(request); } From 23edb27440a43c54a5961d7cff0280a48e52efdf Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 18:06:52 +0000 Subject: [PATCH 03/18] drop the connect timeout from 45 sec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441941 13f79535-47bb-0310-9956-ffa450edef68 --- solr/core/src/java/org/apache/solr/cloud/ZkController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index b15e1988fee..39d5c4f4aad 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -1220,7 +1220,7 @@ public final class ZkController { if (!isLeader && !SKIP_AUTO_RECOVERY) { HttpSolrServer server = null; server = new HttpSolrServer(leaderBaseUrl); - server.setConnectionTimeout(45000); + server.setConnectionTimeout(15000); server.setSoTimeout(120000); WaitForState prepCmd = new WaitForState(); prepCmd.setCoreName(leaderCoreName); From 3105772ed7bd554cc7802adf312e158e00e875d0 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 18:10:12 +0000 Subject: [PATCH 04/18] tests: add timeouts for http client git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441943 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/test/org/apache/solr/cloud/BasicDistributedZkTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index 52bebe624e2..9d1d9ce5a63 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -495,6 +495,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { DirectUpdateHandler2.commitOnClose = false; HttpSolrServer addClient = new HttpSolrServer(url3 + "/unloadcollection3"); + addClient.setConnectionTimeout(15000); + addClient.setSoTimeout(30000); // add a few docs for (int x = 20; x < 100; x++) { SolrInputDocument doc1 = getDoc(id, x, i1, -600, tlong, 600, t1, From edf7b89e7054d6a75ca02de709aa2f137b63416a Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 18:23:40 +0000 Subject: [PATCH 05/18] tests: add timeouts for http clients git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441946 13f79535-47bb-0310-9956-ffa450edef68 --- .../solr/cloud/BasicDistributedZkTest.java | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index 9d1d9ce5a63..e80ae622e12 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -509,6 +509,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { // unload the leader collectionClient = new HttpSolrServer(leaderProps.getBaseUrl()); + collectionClient.setConnectionTimeout(15000); + collectionClient.setSoTimeout(30000); Unload unloadCmd = new Unload(false); unloadCmd.setCoreName(leaderProps.getCoreName()); @@ -531,6 +533,9 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { zkStateReader.getLeaderRetry("unloadcollection", "shard1", 15000); addClient = new HttpSolrServer(url2 + "/unloadcollection2"); + addClient.setConnectionTimeout(15000); + addClient.setSoTimeout(30000); + // add a few docs while the leader is down for (int x = 101; x < 200; x++) { SolrInputDocument doc1 = getDoc(id, x, i1, -600, tlong, 600, t1, @@ -543,6 +548,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { client = clients.get(3); String url4 = getBaseUrl(client); server = new HttpSolrServer(url4); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); createCmd = new Create(); createCmd.setCoreName("unloadcollection4"); @@ -556,6 +563,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { // unload the leader again leaderProps = getLeaderUrlFromZk("unloadcollection", "shard1"); collectionClient = new HttpSolrServer(leaderProps.getBaseUrl()); + collectionClient.setConnectionTimeout(15000); + collectionClient.setSoTimeout(30000); unloadCmd = new Unload(false); unloadCmd.setCoreName(leaderProps.getCoreName()); @@ -578,6 +587,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { // bring the downed leader back as replica server = new HttpSolrServer(leaderProps.getBaseUrl()); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); createCmd = new Create(); createCmd.setCoreName(leaderProps.getCoreName()); @@ -587,20 +598,23 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { waitForRecoveriesToFinish("unloadcollection", zkStateReader, false); - - server = new HttpSolrServer(url1 + "/unloadcollection"); - // System.out.println(server.query(new SolrQuery("*:*")).getResults().getNumFound()); server = new HttpSolrServer(url2 + "/unloadcollection"); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); server.commit(); SolrQuery q = new SolrQuery("*:*"); q.set("distrib", false); long found1 = server.query(q).getResults().getNumFound(); server = new HttpSolrServer(url3 + "/unloadcollection"); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); server.commit(); q = new SolrQuery("*:*"); q.set("distrib", false); long found3 = server.query(q).getResults().getNumFound(); server = new HttpSolrServer(url4 + "/unloadcollection"); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); server.commit(); q = new SolrQuery("*:*"); q.set("distrib", false); @@ -1047,6 +1061,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { // now test that unloading a core gets us a new leader HttpSolrServer server = new HttpSolrServer(baseUrl); + server.setConnectionTimeout(15000); + server.setSoTimeout(30000); Unload unloadCmd = new Unload(true); unloadCmd.setCoreName(props.getCoreName()); From e3343731b16bc12fe84ab9bead09957f692fe337 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 21:33:31 +0000 Subject: [PATCH 06/18] tests: raise so timeout git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441970 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/test/org/apache/solr/cloud/BasicDistributedZkTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index e80ae622e12..83b1b5ee796 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -672,7 +672,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { String url3 = getBaseUrl(client); final HttpSolrServer server = new HttpSolrServer(url3); server.setConnectionTimeout(15000); - server.setSoTimeout(30000); + server.setSoTimeout(60000); ThreadPoolExecutor executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue(), new DefaultSolrThreadFactory("testExecutor")); From da8488a2da3b90844ddf558af2d561304a07d868 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 3 Feb 2013 22:06:17 +0000 Subject: [PATCH 07/18] tests: raise test pause git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441974 13f79535-47bb-0310-9956-ffa450edef68 --- solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java index dc7d6dc907f..53b0acbdd95 100644 --- a/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java @@ -90,7 +90,7 @@ public class RecoveryZkTest extends AbstractFullDistribZkTestBase { waitForThingsToLevelOut(30); - Thread.sleep(1000); + Thread.sleep(5000); waitForRecoveriesToFinish(DEFAULT_COLLECTION, zkStateReader, false, true); From 5e556813fa18c4603f706077b1921686fb52a2fa Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Mon, 4 Feb 2013 13:18:40 +0000 Subject: [PATCH 08/18] LUCENE-4749 - exposed UIMA AEs config parameters in analysis/uima tools git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1442106 13f79535-47bb-0310-9956-ffa450edef68 --- .../analysis/uima/BaseUIMATokenizer.java | 6 ++++-- .../uima/UIMAAnnotationsTokenizer.java | 5 +++-- .../uima/UIMAAnnotationsTokenizerFactory.java | 21 ++++++++++++++----- .../analysis/uima/UIMABaseAnalyzer.java | 7 +++++-- .../analysis/uima/UIMATypeAwareAnalyzer.java | 7 +++++-- .../UIMATypeAwareAnnotationsTokenizer.java | 5 +++-- ...ATypeAwareAnnotationsTokenizerFactory.java | 20 +++++++++++++----- .../test-files/uima/TestEntityAnnotatorAE.xml | 2 +- .../src/test-files/uima/TestWSTokenizerAE.xml | 21 ++++++++++++++++++- .../analysis/uima/UIMABaseAnalyzerTest.java | 14 +++++++++++-- .../uima/UIMATypeAwareAnalyzerTest.java | 4 ++-- .../uima/an/SampleWSTokenizerAnnotator.java | 12 +++++++++-- 12 files changed, 96 insertions(+), 28 deletions(-) diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java index 78758cb7786..212a40d23f9 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java @@ -28,6 +28,8 @@ import org.apache.uima.resource.ResourceInitializationException; import java.io.IOException; import java.io.Reader; +import java.util.HashMap; +import java.util.Map; /** * Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a @@ -39,10 +41,10 @@ public abstract class BaseUIMATokenizer extends Tokenizer { protected final AnalysisEngine ae; protected final CAS cas; - protected BaseUIMATokenizer(Reader reader, String descriptorPath) { + protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map configurationParameters) { super(reader); try { - ae = AEProviderFactory.getInstance().getAEProvider(descriptorPath).getAE(); + ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE(); cas = ae.newCAS(); } catch (ResourceInitializationException e) { throw new RuntimeException(e); diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java index 423a7963aca..19f8c780424 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java @@ -26,6 +26,7 @@ import org.apache.uima.cas.text.AnnotationFS; import java.io.IOException; import java.io.Reader; +import java.util.Map; /** * a {@link Tokenizer} which creates tokens from UIMA Annotations @@ -40,8 +41,8 @@ public final class UIMAAnnotationsTokenizer extends BaseUIMATokenizer { private int finalOffset = 0; - public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) { - super(input, descriptorPath); + public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map configurationParameters, Reader input) { + super(input, descriptorPath, configurationParameters); this.tokenTypeString = tokenType; this.termAttr = addAttribute(CharTermAttribute.class); this.offsetAttr = addAttribute(OffsetAttribute.class); diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java index 6c0ea377fef..a57f54aa3ff 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer; import java.io.Reader; +import java.util.HashMap; import java.util.Map; /** @@ -31,19 +32,29 @@ public class UIMAAnnotationsTokenizerFactory extends TokenizerFactory { private String descriptorPath; private String tokenType; + private Map configurationParameters; @Override public void init(Map args) { super.init(args); - descriptorPath = args.get("descriptorPath"); - tokenType = args.get("tokenType"); - if (descriptorPath == null || tokenType == null) { - throw new IllegalArgumentException("Both descriptorPath and tokenType are mandatory"); + configurationParameters = new HashMap(); + for (String k : args.keySet()) { + if (k.equals("tokenType")) { + tokenType = args.get("tokenType"); + } else if (k.equals("descriptorPath")) { + descriptorPath = args.get("descriptorPath"); + } else { + configurationParameters.put(k, args.get(k)); + } } + if (descriptorPath == null || tokenType == null ) { + throw new IllegalArgumentException("descriptorPath and tokenType are mandatory"); + } + } @Override public Tokenizer create(Reader input) { - return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input); + return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input); } } diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java index e7fb4b25621..b0edc70a6c7 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima; import org.apache.lucene.analysis.Analyzer; import java.io.Reader; +import java.util.Map; /** * An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens @@ -28,15 +29,17 @@ public final class UIMABaseAnalyzer extends Analyzer { private final String descriptorPath; private final String tokenType; + private final Map configurationParameters; - public UIMABaseAnalyzer(String descriptorPath, String tokenType) { + public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map configurationParameters) { this.descriptorPath = descriptorPath; this.tokenType = tokenType; + this.configurationParameters = configurationParameters; } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader)); + return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, reader)); } } diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java index 930351da377..4cc59ce8038 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima; import org.apache.lucene.analysis.Analyzer; import java.io.Reader; +import java.util.Map; /** * {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase @@ -28,15 +29,17 @@ public final class UIMATypeAwareAnalyzer extends Analyzer { private final String descriptorPath; private final String tokenType; private final String featurePath; + private final Map configurationParameters; - public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) { + public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map configurationParameters) { this.descriptorPath = descriptorPath; this.tokenType = tokenType; this.featurePath = featurePath; + this.configurationParameters = configurationParameters; } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { - return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader)); + return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, reader)); } } diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java index 281c7d5b838..abdcb84bd9b 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java @@ -29,6 +29,7 @@ import org.apache.uima.cas.text.AnnotationFS; import java.io.IOException; import java.io.Reader; +import java.util.Map; /** * A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to @@ -50,8 +51,8 @@ public final class UIMATypeAwareAnnotationsTokenizer extends BaseUIMATokenizer { private int finalOffset = 0; - public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) { - super(input, descriptorPath); + public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map configurationParameters, Reader input) { + super(input, descriptorPath, configurationParameters); this.tokenTypeString = tokenType; this.termAttr = addAttribute(CharTermAttribute.class); this.typeAttr = addAttribute(TypeAttribute.class); diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java index 031b0341559..265b965a748 100644 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java @@ -18,10 +18,10 @@ package org.apache.lucene.analysis.uima; */ import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import java.io.Reader; +import java.util.HashMap; import java.util.Map; /** @@ -32,13 +32,23 @@ public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory { private String descriptorPath; private String tokenType; private String featurePath; + private Map configurationParameters; @Override public void init(Map args) { super.init(args); - descriptorPath = args.get("descriptorPath"); - tokenType = args.get("tokenType"); - featurePath = args.get("featurePath"); + configurationParameters = new HashMap(); + for (String k : args.keySet()) { + if (k.equals("featurePath")) { + featurePath = args.get("featurePath"); + } else if (k.equals("tokenType")) { + tokenType = args.get("tokenType"); + } else if (k.equals("descriptorPath")) { + descriptorPath = args.get("descriptorPath"); + } else { + configurationParameters.put(k, args.get(k)); + } + } if (descriptorPath == null || tokenType == null || featurePath == null) { throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory"); } @@ -46,6 +56,6 @@ public class UIMATypeAwareAnnotationsTokenizerFactory extends TokenizerFactory { @Override public Tokenizer create(Reader input) { - return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input); + return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input); } } diff --git a/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml b/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml index d7ec826fd50..e9122e63182 100644 --- a/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml +++ b/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml @@ -20,7 +20,7 @@ true org.apache.lucene.analysis.uima.an.SampleEntityAnnotator - DummyPoSTagger + EntityAnnotator 1.0 ASF diff --git a/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml b/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml index 596a830eb21..b0624eb011b 100644 --- a/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml +++ b/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml @@ -20,9 +20,28 @@ true org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator - DummyPoSTagger + WSTokenizer 1.0 ASF + + + line-end + + the string used as line end + + String + false + false + + + + + line-end + + \n + + + diff --git a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java index 307e344d13e..7cc749026af 100644 --- a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java +++ b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java @@ -36,6 +36,8 @@ import org.junit.Before; import org.junit.Test; import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; /** * Testcase for {@link UIMABaseAnalyzer} @@ -48,7 +50,7 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase { @Before public void setUp() throws Exception { super.setUp(); - analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation"); + analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null); } @Override @@ -120,7 +122,15 @@ public class UIMABaseAnalyzerTest extends BaseTokenStreamTestCase { @Test public void testRandomStrings() throws Exception { - checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"), + checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null), + 100 * RANDOM_MULTIPLIER); + } + + @Test + public void testRandomStringsWithConfigurationParameters() throws Exception { + Map cp = new HashMap(); + cp.put("line-end", "\r"); + checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp), 100 * RANDOM_MULTIPLIER); } diff --git a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java index 50faccbadb9..d595c9a2250 100644 --- a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java +++ b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java @@ -37,7 +37,7 @@ public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase { public void setUp() throws Exception { super.setUp(); analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml", - "org.apache.uima.TokenAnnotation", "posTag"); + "org.apache.uima.TokenAnnotation", "posTag", null); } @Override @@ -63,7 +63,7 @@ public class UIMATypeAwareAnalyzerTest extends BaseTokenStreamTestCase { @Test public void testRandomStrings() throws Exception { checkRandomData(random(), new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml", - "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 100 * RANDOM_MULTIPLIER); + "org.apache.lucene.uima.ts.TokenAnnotation", "pos", null), 100 * RANDOM_MULTIPLIER); } } diff --git a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java index b33666b0321..8f713b34b1f 100644 --- a/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java +++ b/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java @@ -17,11 +17,13 @@ package org.apache.lucene.analysis.uima.an; * limitations under the License. */ +import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.Type; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; /** * Dummy implementation of a UIMA based whitespace tokenizer @@ -30,15 +32,21 @@ public class SampleWSTokenizerAnnotator extends JCasAnnotator_ImplBase { private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation"; private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation"; - private static final String CR = "\n"; + private String lineEnd; private static final String WHITESPACE = " "; + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException { + super.initialize(aContext); + lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end")); + } + @Override public void process(JCas jCas) throws AnalysisEngineProcessException { Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE); Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE); int i = 0; - for (String sentenceString : jCas.getDocumentText().split(CR)) { + for (String sentenceString : jCas.getDocumentText().split(lineEnd)) { // add the sentence AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length()); jCas.addFsToIndexes(sentenceAnnotation); From 80430f5f620e0b849f0c1cd19469457ee60dee63 Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Mon, 4 Feb 2013 13:41:06 +0000 Subject: [PATCH 09/18] LUCENE-4749 - added sample conf in solr/contrib/uima too git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1442112 13f79535-47bb-0310-9956-ffa450edef68 --- .../uima/src/test-files/uima/uima-tokenizers-schema.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml index 13a9c9f8092..6a1dddbfade 100644 --- a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml +++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml @@ -300,7 +300,8 @@ + descriptorPath="/uima/AggregateSentenceAE.xml" tokenType="org.apache.uima.SentenceAnnotation" + ngramsize="2"/> From 9705a9d0dea5b52809a68641332e3803e585274a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 5 Feb 2013 08:24:35 +0000 Subject: [PATCH 10/18] LUCENE-4744: Remove FieldCache.StopFillChacheException git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1442497 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/search/FieldCache.java | 85 ++++-- .../apache/lucene/search/FieldCacheImpl.java | 275 ++++++++---------- .../org/apache/lucene/util/NumericUtils.java | 39 +++ .../lucene/search/JustCompileSearch.java | 12 + .../org/apache/lucene/search/TestSort.java | 34 +++ 5 files changed, 279 insertions(+), 166 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 815d225f8e1..1a3b26baa79 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -19,7 +19,6 @@ package org.apache.lucene.search; import java.io.IOException; import java.io.PrintStream; -import java.text.DecimalFormat; import org.apache.lucene.analysis.NumericTokenStream; // for javadocs import org.apache.lucene.document.IntField; // for javadocs @@ -28,6 +27,7 @@ import org.apache.lucene.document.LongField; // for javadocs import org.apache.lucene.document.DoubleField; // for javadocs import org.apache.lucene.index.DocTermOrds; import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -52,21 +52,22 @@ public interface FieldCache { Object value; } - /** - * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops - * processing terms and returns the current FieldCache - * array. - * @lucene.internal - */ - public static final class StopFillCacheException extends RuntimeException { - } - /** * Marker interface as super-interface to all parsers. It * is used to specify a custom parser to {@link * SortField#SortField(String, FieldCache.Parser)}. */ public interface Parser { + + /** + * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers + * to filter the actual TermsEnum before the field cache is filled. + * + * @param terms the {@link Terms} instance to create the {@link TermsEnum} from. + * @return a possibly filtered {@link TermsEnum} instance, this method must not return null. + * @throws IOException if an {@link IOException} occurs + */ + public TermsEnum termsEnum(Terms terms) throws IOException; } /** Interface to parse bytes from document fields. @@ -134,6 +135,10 @@ public interface FieldCache { public String toString() { return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }; /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ @@ -150,6 +155,11 @@ public interface FieldCache { public String toString() { return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }; /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ @@ -162,6 +172,12 @@ public interface FieldCache { // directly from byte[] return Integer.parseInt(term.utf8ToString()); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + @Override public String toString() { return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; @@ -178,6 +194,12 @@ public interface FieldCache { // directly from byte[] return Float.parseFloat(term.utf8ToString()); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + @Override public String toString() { return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; @@ -194,6 +216,12 @@ public interface FieldCache { // directly from byte[] return Long.parseLong(term.utf8ToString()); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + @Override public String toString() { return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; @@ -210,6 +238,12 @@ public interface FieldCache { // directly from byte[] return Double.parseDouble(term.utf8ToString()); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + @Override public String toString() { return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; @@ -223,10 +257,14 @@ public interface FieldCache { public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){ @Override public int parseInt(BytesRef term) { - if (NumericUtils.getPrefixCodedIntShift(term) > 0) - throw new StopFillCacheException(); return NumericUtils.prefixCodedToInt(term); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedInts(terms.iterator(null)); + } + @Override public String toString() { return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; @@ -240,14 +278,17 @@ public interface FieldCache { public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){ @Override public float parseFloat(BytesRef term) { - if (NumericUtils.getPrefixCodedIntShift(term) > 0) - throw new StopFillCacheException(); return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)); } @Override public String toString() { return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedInts(terms.iterator(null)); + } }; /** @@ -257,14 +298,17 @@ public interface FieldCache { public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){ @Override public long parseLong(BytesRef term) { - if (NumericUtils.getPrefixCodedLongShift(term) > 0) - throw new StopFillCacheException(); return NumericUtils.prefixCodedToLong(term); } @Override public String toString() { return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedLongs(terms.iterator(null)); + } }; /** @@ -274,14 +318,17 @@ public interface FieldCache { public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){ @Override public double parseDouble(BytesRef term) { - if (NumericUtils.getPrefixCodedLongShift(term) > 0) - throw new StopFillCacheException(); return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)); } @Override public String toString() { return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedLongs(terms.iterator(null)); + } }; @@ -652,7 +699,7 @@ public interface FieldCache { } } - + /** * EXPERT: Generates an array of CacheEntry objects representing all items * currently in the FieldCache. diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index df42c0da747..9430e507558 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -140,13 +140,6 @@ class FieldCacheImpl implements FieldCache { public Object getValue() { return value; } } - /** - * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops - * processing terms and returns the current FieldCache - * array. - */ - static final class StopFillCacheException extends RuntimeException { - } // per-segment fieldcaches don't purge until the shared core closes. final SegmentReader.CoreClosedListener purgeCore = new SegmentReader.CoreClosedListener() { @@ -360,32 +353,30 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final byte termval = parser.parseByte(term); + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final byte termval = parser.parseByte(term); - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } if (setDocsWithField) { @@ -435,32 +426,30 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final short termval = parser.parseShort(term); + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final short termval = parser.parseShort(term); - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } if (setDocsWithField) { @@ -536,37 +525,35 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final int termval = parser.parseInt(term); + if (retArray == null) { + // late init so numeric fields don't double allocate + retArray = new int[maxDoc]; + } + + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final int termval = parser.parseInt(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new int[maxDoc]; - } - - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } @@ -689,37 +676,35 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final float termval = parser.parseFloat(term); + if (retArray == null) { + // late init so numeric fields don't double allocate + retArray = new float[maxDoc]; + } + + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final float termval = parser.parseFloat(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new float[maxDoc]; - } - - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } @@ -779,37 +764,35 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final long termval = parser.parseLong(term); + if (retArray == null) { + // late init so numeric fields don't double allocate + retArray = new long[maxDoc]; + } + + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final long termval = parser.parseLong(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new long[maxDoc]; - } - - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } @@ -870,37 +853,35 @@ class FieldCacheImpl implements FieldCache { setDocsWithField = false; } } - final TermsEnum termsEnum = terms.iterator(null); + final TermsEnum termsEnum = parser.termsEnum(terms); + assert termsEnum != null : "TermsEnum must not be null"; DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final double termval = parser.parseDouble(term); + if (retArray == null) { + // late init so numeric fields don't double allocate + retArray = new double[maxDoc]; + } + + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - final double termval = parser.parseDouble(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new double[maxDoc]; - } - - docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - if (setDocsWithField) { - if (docsWithField == null) { - // Lazy init - docsWithField = new FixedBitSet(maxDoc); - } - docsWithField.set(docID); + retArray[docID] = termval; + if (setDocsWithField) { + if (docsWithField == null) { + // Lazy init + docsWithField = new FixedBitSet(maxDoc); } + docsWithField.set(docID); } } - } catch (FieldCache.StopFillCacheException stop) { } } if (retArray == null) { // no values diff --git a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java index f4fcc632339..34ead1fae72 100644 --- a/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java @@ -22,6 +22,8 @@ import org.apache.lucene.document.DoubleField; // javadocs import org.apache.lucene.document.FloatField; // javadocs import org.apache.lucene.document.IntField; // javadocs import org.apache.lucene.document.LongField; // javadocs +import org.apache.lucene.index.FilteredTermsEnum; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; // for javadocs @@ -456,4 +458,41 @@ public final class NumericUtils { } + /** + * Filters the given {@link TermsEnum} by accepting only prefix coded 64 bit + * terms with a shift value of 0. + * + * @param termsEnum + * the terms enum to filter + * @return a filtered {@link TermsEnum} that only returns prefix coded 64 bit + * terms with a shift value of 0. + */ + public static TermsEnum filterPrefixCodedLongs(TermsEnum termsEnum) { + return new FilteredTermsEnum(termsEnum, false) { + @Override + protected AcceptStatus accept(BytesRef term) { + return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END; + } + }; + } + + /** + * Filters the given {@link TermsEnum} by accepting only prefix coded 32 bit + * terms with a shift value of 0. + * + * @param termsEnum + * the terms enum to filter + * @return a filtered {@link TermsEnum} that only returns prefix coded 32 bit + * terms with a shift value of 0. + */ + public static TermsEnum filterPrefixCodedInts(TermsEnum termsEnum) { + return new FilteredTermsEnum(termsEnum, false) { + + @Override + protected AcceptStatus accept(BytesRef term) { + return NumericUtils.getPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END; + } + }; + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 88ef513bf7e..e2fde126487 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Norm; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -95,6 +97,11 @@ final class JustCompileSearch { public long parseLong(BytesRef string) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public TermsEnum termsEnum(Terms terms) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } @@ -104,6 +111,11 @@ final class JustCompileSearch { public double parseDouble(BytesRef term) { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + + @Override + public TermsEnum termsEnum(Terms terms) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSort.java b/lucene/core/src/test/org/apache/lucene/search/TestSort.java index e143d6a4750..95348834a49 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java @@ -53,6 +53,8 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.Directory; @@ -581,6 +583,11 @@ public class TestSort extends LuceneTestCase { public final int parseInt(final BytesRef term) { return (term.bytes[term.offset]-'A') * 123456; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " IntParser"); @@ -591,6 +598,10 @@ public class TestSort extends LuceneTestCase { public final float parseFloat(final BytesRef term) { return (float) Math.sqrt( term.bytes[term.offset] ); } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " FloatParser"); @@ -601,6 +612,11 @@ public class TestSort extends LuceneTestCase { public final long parseLong(final BytesRef term) { return (term.bytes[term.offset]-'A') * 1234567890L; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " LongParser"); @@ -611,6 +627,10 @@ public class TestSort extends LuceneTestCase { public final double parseDouble(final BytesRef term) { return Math.pow( term.bytes[term.offset], (term.bytes[term.offset]-'A') ); } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " DoubleParser"); @@ -621,6 +641,11 @@ public class TestSort extends LuceneTestCase { public final byte parseByte(final BytesRef term) { return (byte) (term.bytes[term.offset]-'A'); } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " ByteParser"); @@ -631,6 +656,10 @@ public class TestSort extends LuceneTestCase { public final short parseShort(final BytesRef term) { return (short) (term.bytes[term.offset]-'A'); } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }), SortField.FIELD_DOC ); assertMatches (full, queryA, sort, "JIHGFEDCBA"); assertSaneFieldCaches(getTestName() + " ShortParser"); @@ -708,6 +737,11 @@ public class TestSort extends LuceneTestCase { public final int parseInt(final BytesRef term) { return (term.bytes[term.offset]-'A') * 123456; } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } }; @Override From ec8e1b9cdd77c18911440610650ff8770dfe2bee Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Tue, 5 Feb 2013 09:15:35 +0000 Subject: [PATCH 11/18] LUCENE-4570: Use the Policeman Forbidden API checker, released separately from Lucene and downloaded via Ivy git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1442507 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + lucene/build.xml | 24 +- lucene/common-build.xml | 8 + lucene/licenses/asm-debug-all-4.1.jar.sha1 | 1 - .../asm-debug-all-LICENSE-BSD_LIKE.txt | 29 - lucene/licenses/asm-debug-all-NOTICE.txt | 2 - lucene/tools/build.xml | 1 - lucene/tools/custom-tasks.xml | 1 - lucene/tools/forbiddenApis/commons-io.txt | 35 -- lucene/tools/forbiddenApis/jdk-deprecated.txt | 441 ---------------- lucene/tools/forbiddenApis/jdk.txt | 95 ---- lucene/tools/forbiddenApis/system-out.txt | 22 - lucene/tools/ivy.xml | 8 - lucene/tools/src/java/lucene-solr.antlib.xml | 3 - .../validation/ForbiddenApisCheckTask.java | 498 ------------------ solr/build.xml | 29 +- solr/common-build.xml | 4 + solr/solrj/ivy.xml | 2 +- 18 files changed, 50 insertions(+), 1156 deletions(-) delete mode 100644 lucene/licenses/asm-debug-all-4.1.jar.sha1 delete mode 100644 lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt delete mode 100644 lucene/licenses/asm-debug-all-NOTICE.txt delete mode 100644 lucene/tools/forbiddenApis/commons-io.txt delete mode 100644 lucene/tools/forbiddenApis/jdk-deprecated.txt delete mode 100644 lucene/tools/forbiddenApis/jdk.txt delete mode 100644 lucene/tools/forbiddenApis/system-out.txt delete mode 100644 lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0696fa5b9da..9aa08b03e78 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -137,6 +137,9 @@ Build * LUCENE-4636: Upgrade ivy to 2.3.0 (Shawn Heisey via Robert Muir) +* LUCENE-4570: Use the Policeman Formbidden API checker, released separately + from Lucene and downloaded via Ivy. (Uwe Schindler, Robert Muir) + ======================= Lucene 4.1.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/build.xml b/lucene/build.xml index 6751ce0f43e..7961b92ac9a 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -157,28 +157,34 @@ - + + + + + + + + + - - - - - - + + + + - + - + diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 45267c7ac41..8056e5b02f3 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -1902,6 +1902,14 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites + + + + + + + diff --git a/lucene/licenses/asm-debug-all-4.1.jar.sha1 b/lucene/licenses/asm-debug-all-4.1.jar.sha1 deleted file mode 100644 index 09de7a9691d..00000000000 --- a/lucene/licenses/asm-debug-all-4.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dd6ba5c392d4102458494e29f54f70ac534ec2a2 diff --git a/lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt b/lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt deleted file mode 100644 index c5aba7be471..00000000000 --- a/lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2000-2011 INRIA, France Telecom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holders nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lucene/licenses/asm-debug-all-NOTICE.txt b/lucene/licenses/asm-debug-all-NOTICE.txt deleted file mode 100644 index f6df5a6fe78..00000000000 --- a/lucene/licenses/asm-debug-all-NOTICE.txt +++ /dev/null @@ -1,2 +0,0 @@ -ASM - Lightweight Java Bytecode Manipulation Framework -Copyright © 1999-2012, OW2 Consortium diff --git a/lucene/tools/build.xml b/lucene/tools/build.xml index 55f5b4a2cd7..35310bc1f2b 100644 --- a/lucene/tools/build.xml +++ b/lucene/tools/build.xml @@ -25,7 +25,6 @@ - - - - - - diff --git a/lucene/tools/src/java/lucene-solr.antlib.xml b/lucene/tools/src/java/lucene-solr.antlib.xml index f18d8a3287b..6ab57c6695a 100644 --- a/lucene/tools/src/java/lucene-solr.antlib.xml +++ b/lucene/tools/src/java/lucene-solr.antlib.xml @@ -18,7 +18,4 @@ - diff --git a/lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java b/lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java deleted file mode 100644 index 545e2496195..00000000000 --- a/lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java +++ /dev/null @@ -1,498 +0,0 @@ -package org.apache.lucene.validation; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.objectweb.asm.ClassReader; -import org.objectweb.asm.Label; -import org.objectweb.asm.ClassVisitor; -import org.objectweb.asm.FieldVisitor; -import org.objectweb.asm.MethodVisitor; -import org.objectweb.asm.Opcodes; -import org.objectweb.asm.Type; -import org.objectweb.asm.commons.Method; - -import org.apache.tools.ant.AntClassLoader; -import org.apache.tools.ant.BuildException; -import org.apache.tools.ant.Project; -import org.apache.tools.ant.Task; -import org.apache.tools.ant.types.Path; -import org.apache.tools.ant.types.FileSet; -import org.apache.tools.ant.types.Reference; -import org.apache.tools.ant.types.Resource; -import org.apache.tools.ant.types.ResourceCollection; -import org.apache.tools.ant.types.resources.FileResource; -import org.apache.tools.ant.types.resources.Resources; -import org.apache.tools.ant.types.resources.FileResource; -import org.apache.tools.ant.types.resources.StringResource; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.BufferedReader; -import java.io.Reader; -import java.io.File; -import java.io.StringReader; -import java.util.Arrays; -import java.util.Collections; -import java.util.Formatter; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.HashSet; -import java.util.Set; - -/** - * Task to check if a set of class files contains calls to forbidden APIs - * from a given classpath and list of API signatures (either inline or as pointer to files). - * In contrast to other ANT tasks, this tool does only visit the given classpath - * and the system classloader. It uses the local classpath in preference to the system classpath - * (which violates the spec). - */ -public class ForbiddenApisCheckTask extends Task { - - private final Resources classFiles = new Resources(); - private final Resources apiSignatures = new Resources(); - private Path classpath = null; - - private boolean failOnUnsupportedJava = false; - - ClassLoader loader = null; - - final Map classesToCheck = new HashMap(); - final Map classpathClassCache = new HashMap(); - - final Map forbiddenFields = new HashMap(); - final Map forbiddenMethods = new HashMap(); - final Map forbiddenClasses = new HashMap(); - - /** Reads a class (binary name) from the given {@link ClassLoader}. */ - ClassSignatureLookup getClassFromClassLoader(final String clazz) throws BuildException { - ClassSignatureLookup c = classpathClassCache.get(clazz); - if (c == null) { - try { - final InputStream in = loader.getResourceAsStream(clazz.replace('.', '/') + ".class"); - if (in == null) { - throw new BuildException("Loading of class " + clazz + " failed: Not found"); - } - try { - classpathClassCache.put(clazz, c = new ClassSignatureLookup(new ClassReader(in))); - } finally { - in.close(); - } - } catch (IOException ioe) { - throw new BuildException("Loading of class " + clazz + " failed.", ioe); - } - } - return c; - } - - /** Adds the method signature to the list of disallowed methods. The Signature is checked against the given ClassLoader. */ - private void addSignature(final String signature) throws BuildException { - final String clazz, field; - final Method method; - int p = signature.indexOf('#'); - if (p >= 0) { - clazz = signature.substring(0, p); - final String s = signature.substring(p + 1); - p = s.indexOf('('); - if (p >= 0) { - if (p == 0) { - throw new BuildException("Invalid method signature (method name missing): " + signature); - } - // we ignore the return type, its just to match easier (so return type is void): - try { - method = Method.getMethod("void " + s, true); - } catch (IllegalArgumentException iae) { - throw new BuildException("Invalid method signature: " + signature); - } - field = null; - } else { - field = s; - method = null; - } - } else { - clazz = signature; - method = null; - field = null; - } - // check class & method/field signature, if it is really existent (in classpath), but we don't really load the class into JVM: - final ClassSignatureLookup c = getClassFromClassLoader(clazz); - if (method != null) { - assert field == null; - // list all methods with this signature: - boolean found = false; - for (final Method m : c.methods) { - if (m.getName().equals(method.getName()) && Arrays.equals(m.getArgumentTypes(), method.getArgumentTypes())) { - found = true; - forbiddenMethods.put(c.reader.getClassName() + '\000' + m, signature); - // don't break when found, as there may be more covariant overrides! - } - } - if (!found) { - throw new BuildException("No method found with following signature: " + signature); - } - } else if (field != null) { - assert method == null; - if (!c.fields.contains(field)) { - throw new BuildException("No field found with following name: " + signature); - } - forbiddenFields.put(c.reader.getClassName() + '\000' + field, signature); - } else { - assert field == null && method == null; - // only add the signature as class name - forbiddenClasses.put(c.reader.getClassName(), signature); - } - } - - /** Reads a list of API signatures. Closes the Reader when done (on Exception, too)! */ - private void parseApiFile(Reader reader) throws IOException { - final BufferedReader r = new BufferedReader(reader); - try { - String line; - while ((line = r.readLine()) != null) { - line = line.trim(); - if (line.length() == 0 || line.startsWith("#")) - continue; - addSignature(line); - } - } finally { - r.close(); - } - } - - /** Parses a class given as (FileSet) Resource */ - private ClassReader loadClassFromResource(final Resource res) throws BuildException { - try { - final InputStream stream = res.getInputStream(); - try { - return new ClassReader(stream); - } finally { - stream.close(); - } - } catch (IOException ioe) { - throw new BuildException("IO problem while reading class file " + res, ioe); - } - } - - /** Parses a class given as Resource and checks for valid method invocations */ - private int checkClass(final ClassReader reader) { - final int[] violations = new int[1]; - reader.accept(new ClassVisitor(Opcodes.ASM4) { - final String className = Type.getObjectType(reader.getClassName()).getClassName(); - String source = null; - - @Override - public void visitSource(String source, String debug) { - this.source = source; - } - - @Override - public MethodVisitor visitMethod(int access, String name, String desc, String signature, String[] exceptions) { - return new MethodVisitor(Opcodes.ASM4) { - private int lineNo = -1; - - private ClassSignatureLookup lookupRelatedClass(String internalName) { - ClassSignatureLookup c = classesToCheck.get(internalName); - if (c == null) try { - c = getClassFromClassLoader(internalName); - } catch (BuildException be) { - // we ignore lookup errors and simply ignore this related class - c = null; - } - return c; - } - - private boolean checkClassUse(String owner) { - final String printout = forbiddenClasses.get(owner); - if (printout != null) { - log("Forbidden class use: " + printout, Project.MSG_ERR); - return true; - } - return false; - } - - private boolean checkMethodAccess(String owner, Method method) { - if (checkClassUse(owner)) { - return true; - } - final String printout = forbiddenMethods.get(owner + '\000' + method); - if (printout != null) { - log("Forbidden method invocation: " + printout, Project.MSG_ERR); - return true; - } - final ClassSignatureLookup c = lookupRelatedClass(owner); - if (c != null && !c.methods.contains(method)) { - final String superName = c.reader.getSuperName(); - if (superName != null && checkMethodAccess(superName, method)) { - return true; - } - final String[] interfaces = c.reader.getInterfaces(); - if (interfaces != null) { - for (String intf : interfaces) { - if (intf != null && checkMethodAccess(intf, method)) { - return true; - } - } - } - } - return false; - } - - private boolean checkFieldAccess(String owner, String field) { - if (checkClassUse(owner)) { - return true; - } - final String printout = forbiddenFields.get(owner + '\000' + field); - if (printout != null) { - log("Forbidden field access: " + printout, Project.MSG_ERR); - return true; - } - final ClassSignatureLookup c = lookupRelatedClass(owner); - if (c != null && !c.fields.contains(field)) { - final String superName = c.reader.getSuperName(); - if (superName != null && checkFieldAccess(superName, field)) { - return true; - } - final String[] interfaces = c.reader.getInterfaces(); - if (interfaces != null) { - for (String intf : interfaces) { - if (intf != null && checkFieldAccess(intf, field)) { - return true; - } - } - } - } - return false; - } - - @Override - public void visitMethodInsn(int opcode, String owner, String name, String desc) { - if (checkMethodAccess(owner, new Method(name, desc))) { - violations[0]++; - reportSourceAndLine(); - } - } - - @Override - public void visitFieldInsn(int opcode, String owner, String name, String desc) { - if (checkFieldAccess(owner, name)) { - violations[0]++; - reportSourceAndLine(); - } - } - - private void reportSourceAndLine() { - final StringBuilder sb = new StringBuilder(" in ").append(className); - if (source != null && lineNo >= 0) { - new Formatter(sb, Locale.ROOT).format(" (%s:%d)", source, lineNo).flush(); - } - log(sb.toString(), Project.MSG_ERR); - } - - @Override - public void visitLineNumber(int lineNo, Label start) { - this.lineNo = lineNo; - } - }; - } - }, ClassReader.SKIP_FRAMES); - return violations[0]; - } - - @Override - public void execute() throws BuildException { - AntClassLoader antLoader = null; - try { - if (classpath != null) { - classpath.setProject(getProject()); - this.loader = antLoader = getProject().createClassLoader(ClassLoader.getSystemClassLoader(), classpath); - // force that loading from this class loader is done first, then parent is asked. - // This violates spec, but prevents classes in any system classpath to be used if a local one is available: - antLoader.setParentFirst(false); - } else { - this.loader = ClassLoader.getSystemClassLoader(); - } - classFiles.setProject(getProject()); - apiSignatures.setProject(getProject()); - - final long start = System.currentTimeMillis(); - - // check if we can load runtime classes (e.g. java.lang.String). - // If this fails, we have a newer Java version than ASM supports: - try { - getClassFromClassLoader(String.class.getName()); - } catch (IllegalArgumentException iae) { - final String msg = String.format(Locale.ROOT, - "Your Java version (%s) is not supported by <%s/>. Please run the checks with a supported JDK!", - System.getProperty("java.version"), getTaskName()); - if (failOnUnsupportedJava) { - throw new BuildException(msg); - } else { - log("WARNING: " + msg, Project.MSG_WARN); - return; - } - } - - try { - @SuppressWarnings("unchecked") - Iterator iter = (Iterator) apiSignatures.iterator(); - if (!iter.hasNext()) { - throw new BuildException("You need to supply at least one API signature definition through apiFile=, , or inner text."); - } - while (iter.hasNext()) { - final Resource r = iter.next(); - if (!r.isExists()) { - throw new BuildException("Resource does not exist: " + r); - } - if (r instanceof StringResource) { - final String s = ((StringResource) r).getValue(); - if (s != null && s.trim().length() > 0) { - log("Reading inline API signatures...", Project.MSG_INFO); - parseApiFile(new StringReader(s)); - } - } else { - log("Reading API signatures: " + r, Project.MSG_INFO); - parseApiFile(new InputStreamReader(r.getInputStream(), "UTF-8")); - } - } - } catch (IOException ioe) { - throw new BuildException("IO problem while reading files with API signatures.", ioe); - } - if (forbiddenMethods.isEmpty() && forbiddenClasses.isEmpty()) { - throw new BuildException("No API signatures found; use apiFile=, , or inner text to define those!"); - } - - log("Loading classes to check...", Project.MSG_INFO); - - @SuppressWarnings("unchecked") - Iterator iter = (Iterator) classFiles.iterator(); - if (!iter.hasNext()) { - throw new BuildException("There is no given or the fileset does not contain any class files to check."); - } - while (iter.hasNext()) { - final Resource r = iter.next(); - if (!r.isExists()) { - throw new BuildException("Class file does not exist: " + r); - } - - ClassReader reader = loadClassFromResource(r); - classesToCheck.put(reader.getClassName(), new ClassSignatureLookup(reader)); - } - - log("Scanning for API signatures and dependencies...", Project.MSG_INFO); - - int errors = 0; - for (final ClassSignatureLookup c : classesToCheck.values()) { - errors += checkClass(c.reader); - } - - log(String.format(Locale.ROOT, - "Scanned %d (and %d related) class file(s) for forbidden API invocations (in %.2fs), %d error(s).", - classesToCheck.size(), classpathClassCache.size(), (System.currentTimeMillis() - start) / 1000.0, errors), - errors > 0 ? Project.MSG_ERR : Project.MSG_INFO); - - if (errors > 0) { - throw new BuildException("Check for forbidden API calls failed, see log."); - } - } finally { - this.loader = null; - if (antLoader != null) antLoader.cleanup(); - antLoader = null; - classesToCheck.clear(); - classpathClassCache.clear(); - forbiddenFields.clear(); - forbiddenMethods.clear(); - forbiddenClasses.clear(); - } - } - - /** Set of class files to check */ - public void add(ResourceCollection rc) { - classFiles.add(rc); - } - - /** A file with API signatures apiFile= attribute */ - public void setApiFile(File file) { - apiSignatures.add(new FileResource(getProject(), file)); - } - - /** Set of files with API signatures as nested element */ - public FileSet createApiFileSet() { - final FileSet fs = new FileSet(); - fs.setProject(getProject()); - apiSignatures.add(fs); - return fs; - } - - /** Support for API signatures list as nested text */ - public void addText(String text) { - apiSignatures.add(new StringResource(getProject(), text)); - } - - /** Classpath as classpath= attribute */ - public void setClasspath(Path classpath) { - createClasspath().append(classpath); - } - - /** Classpath as classpathRef= attribute */ - public void setClasspathRef(Reference r) { - createClasspath().setRefid(r); - } - - /** Classpath as nested element */ - public Path createClasspath() { - if (this.classpath == null) { - this.classpath = new Path(getProject()); - } - return this.classpath.createPath(); - } - - public void setFailOnUnsupportedJava(boolean failOnUnsupportedJava) { - this.failOnUnsupportedJava = failOnUnsupportedJava; - } - - static final class ClassSignatureLookup { - public final ClassReader reader; - public final Set methods; - public final Set fields; - - public ClassSignatureLookup(final ClassReader reader) { - this.reader = reader; - final Set methods = new HashSet(); - final Set fields = new HashSet(); - reader.accept(new ClassVisitor(Opcodes.ASM4) { - @Override - public MethodVisitor visitMethod(int access, String name, String desc, String signature, String[] exceptions) { - final Method m = new Method(name, desc); - methods.add(m); - return null; - } - - @Override - public FieldVisitor visitField(int access, String name, String desc, String signature, Object value) { - fields.add(name); - return null; - } - }, ClassReader.SKIP_CODE | ClassReader.SKIP_DEBUG | ClassReader.SKIP_FRAMES); - this.methods = Collections.unmodifiableSet(methods); - this.fields = Collections.unmodifiableSet(fields); - } - } - -} diff --git a/solr/build.xml b/solr/build.xml index 9c0cfdf6fb1..963a6c9c667 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -236,19 +236,29 @@ - + + + + + + + + + + + - - - - - - + + + + + - + @@ -258,8 +268,7 @@ - - + diff --git a/solr/common-build.xml b/solr/common-build.xml index f56e96bb1b7..3a90428a730 100644 --- a/solr/common-build.xml +++ b/solr/common-build.xml @@ -68,6 +68,10 @@ where X.Y.M is the last version released (on this branch). --> + + +