From 32a0f402d66a1da33498a12d98ab366579f40f1c Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter"
Date: Mon, 23 Jul 2012 17:33:24 +0000
Subject: [PATCH 1/6] SOLR-3623: Fixed inconsistent treatment of third-party
dependencies for solr contribs analysis-extras & uima
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364728 13f79535-47bb-0310-9956-ffa450edef68
---
solr/CHANGES.txt | 2 +
solr/common-build.xml | 53 ++++++++--------
solr/contrib/analysis-extras/README.txt | 7 ++-
solr/contrib/analysis-extras/build.xml | 19 +++---
solr/contrib/analysis-extras/ivy.xml | 3 +
.../lib/morfologik-fsa-1.5.3.jar.sha1 | 1 +
.../lib/morfologik-fsa-LICENSE-BSD.txt | 29 +++++++++
.../lib/morfologik-fsa-NOTICE.txt | 2 +
.../lib/morfologik-polish-1.5.3.jar.sha1 | 1 +
.../lib/morfologik-polish-LICENSE-BSD.txt | 62 +++++++++++++++++++
.../lib/morfologik-polish-NOTICE.txt | 6 ++
.../lib/morfologik-stemming-1.5.3.jar.sha1 | 1 +
.../lib/morfologik-stemming-LICENSE-BSD.txt | 29 +++++++++
.../lib/morfologik-stemming-NOTICE.txt | 2 +
solr/contrib/uima/README.txt | 1 +
15 files changed, 182 insertions(+), 36 deletions(-)
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt
create mode 100644 solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2508771e4df..9900f4a37c6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -131,6 +131,8 @@ Bug Fixes
* SOLR-3663: There are a couple of bugs in the sync process when a leader goes down and a
new leader is elected. (Mark Miller)
+* SOLR-3623: Fixed inconsistent treatment of third-party dependencies for
+ solr contribs analysis-extras & uima (hossman)
Other Changes
----------------------
diff --git a/solr/common-build.xml b/solr/common-build.xml
index e97a2aaa8a6..20170152fac 100644
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@@ -70,21 +70,32 @@
-->
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -125,7 +136,7 @@
@@ -137,19 +148,11 @@
+
+
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/solr/contrib/analysis-extras/README.txt b/solr/contrib/analysis-extras/README.txt
index c9329438fe7..c7cfbc37f60 100644
--- a/solr/contrib/analysis-extras/README.txt
+++ b/solr/contrib/analysis-extras/README.txt
@@ -9,8 +9,11 @@ Relies upon the following lucene components (in lucene-libs/):
* lucene-analyzers-icu-X.Y.jar
* lucene-analyzers-smartcn-X.Y.jar
* lucene-analyzers-stempel-X.Y.jar
-
-And the ICU library (in lib/):
+ * lucene-analyzers-morfologik-X.Y.jar
+ * lucene-analyzers-smartcn-X.Y.jar
+
+And the following third-party library (in lib/):
* icu4j-X.Y.jar
+ * morfologik-*.jar
diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml
index 93519b05d54..398b7201ea6 100644
--- a/solr/contrib/analysis-extras/build.xml
+++ b/solr/contrib/analysis-extras/build.xml
@@ -24,13 +24,17 @@
+
+
-
-
-
-
-
+
+
@@ -38,10 +42,7 @@
depends="jar-analyzers-icu, jar-analyzers-smartcn, jar-analyzers-stempel, jar-analyzers-morfologik">
-
-
-
-
+
diff --git a/solr/contrib/analysis-extras/ivy.xml b/solr/contrib/analysis-extras/ivy.xml
index 62fcffbab96..6329c419bb1 100644
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@@ -20,6 +20,9 @@
+
+
+
diff --git a/solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1 b/solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1
new file mode 100644
index 00000000000..3d3b86d5f8c
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-1.5.3.jar.sha1
@@ -0,0 +1 @@
+d1f729cd3019e6d86485226202f84458141a5688
diff --git a/solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt b/solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt
new file mode 100644
index 00000000000..f97fb7dfe38
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-LICENSE-BSD.txt
@@ -0,0 +1,29 @@
+
+Copyright (c) 2006 Dawid Weiss
+Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of Morfologik nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt b/solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt
new file mode 100644
index 00000000000..18ba2f3e39c
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-fsa-NOTICE.txt
@@ -0,0 +1,2 @@
+This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
+(http://morfologik.blogspot.com/).
diff --git a/solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1 b/solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1
new file mode 100644
index 00000000000..6eb48a47896
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-1.5.3.jar.sha1
@@ -0,0 +1 @@
+8217b6f7ad018ceda0e824b2e60340000da4397a
diff --git a/solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt b/solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt
new file mode 100644
index 00000000000..04ffd07ece9
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-LICENSE-BSD.txt
@@ -0,0 +1,62 @@
+BSD-licensed dictionary of Polish (Morfologik)
+
+Copyright (c) 2012, Marcin Miłkowski
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--
+
+BSD-licensed dictionary of Polish (SGJP)
+http://sgjp.pl/morfeusz/
+
+Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
+ Marcin Woliński, Robert Wołosz
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt b/solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt
new file mode 100644
index 00000000000..a8a3aa11a3d
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-polish-NOTICE.txt
@@ -0,0 +1,6 @@
+
+This product includes data from BSD-licensed dictionary of Polish (Morfologik)
+(http://morfologik.blogspot.com/)
+
+This product includes data from BSD-licensed dictionary of Polish (SGJP)
+(http://sgjp.pl/morfeusz/)
diff --git a/solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1 b/solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1
new file mode 100644
index 00000000000..c31642be45d
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-1.5.3.jar.sha1
@@ -0,0 +1 @@
+c4ead57b78fa71b00553ff21da6fb5a326e914e8
diff --git a/solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt b/solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt
new file mode 100644
index 00000000000..f97fb7dfe38
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-LICENSE-BSD.txt
@@ -0,0 +1,29 @@
+
+Copyright (c) 2006 Dawid Weiss
+Copyright (c) 2007-2012 Dawid Weiss, Marcin Miłkowski
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ * Neither the name of Morfologik nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt b/solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt
new file mode 100644
index 00000000000..18ba2f3e39c
--- /dev/null
+++ b/solr/contrib/analysis-extras/lib/morfologik-stemming-NOTICE.txt
@@ -0,0 +1,2 @@
+This product includes BSD-licensed software developed by Dawid Weiss and Marcin Miłkowski
+(http://morfologik.blogspot.com/).
diff --git a/solr/contrib/uima/README.txt b/solr/contrib/uima/README.txt
index 9d45910666e..70d49f8ff37 100644
--- a/solr/contrib/uima/README.txt
+++ b/solr/contrib/uima/README.txt
@@ -6,6 +6,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
or set tags in solrconfig.xml appropriately to point those jar files.
+
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
From c0f8cd69a8a8e267305c3d3383bed5616fde4b01 Mon Sep 17 00:00:00 2001
From: Robert Muir
Date: Mon, 23 Jul 2012 19:26:00 +0000
Subject: [PATCH 2/6] LUCENE-4248: add producer assertions to Codec API / fix
producer inconsistencies
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364763 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/codecs/BlockTreeTermsWriter.java | 2 +-
.../lucene/codecs/PostingsConsumer.java | 2 +-
.../apache/lucene/codecs/TermsConsumer.java | 2 +-
.../index/FreqProxTermsWriterPerField.java | 4 +-
.../org/apache/lucene/index/TestCodecs.java | 4 +-
.../lucene/index/TestPostingsFormat.java | 4 +-
.../asserting/AssertingPostingsFormat.java | 88 ++++++++++++++++++-
7 files changed, 95 insertions(+), 11 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
index 80156f869a7..8dc99b3d27d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
@@ -896,7 +896,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// w.close();
// }
} else {
- assert sumTotalTermFreq == 0;
+ assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
assert docCount == 0;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
index f9db84a90ff..e31f4b79343 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
@@ -146,6 +146,6 @@ public abstract class PostingsConsumer {
df++;
}
}
- return new TermStats(df, totTF);
+ return new TermStats(df, indexOptions == IndexOptions.DOCS_ONLY ? -1 : totTF);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
index 30419c95919..4148430f408 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
@@ -205,6 +205,6 @@ public abstract class TermsConsumer {
}
}
}
- finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
+ finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
index 425f158afce..6a5f1f119bf 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@@ -542,11 +542,11 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
}
postingsConsumer.finishDoc();
}
- termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+ termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF;
sumDocFreq += numDocs;
}
- termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
+ termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 7f50fa8ed08..8be1027cd0a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -116,7 +116,7 @@ public class TestCodecs extends LuceneTestCase {
sumDF += term.docs.length;
sumTotalTermCount += term.write(termsConsumer);
}
- termsConsumer.finish(sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
+ termsConsumer.finish(omitTF ? -1 : sumTotalTermCount, sumDF, (int) visitedDocs.cardinality());
}
}
@@ -168,7 +168,7 @@ public class TestCodecs extends LuceneTestCase {
postingsConsumer.finishDoc();
}
}
- termsConsumer.finishTerm(text, new TermStats(docs.length, totTF));
+ termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
return totTF;
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
index c5bdba8ee29..7c55e092df8 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
@@ -428,12 +428,12 @@ public class TestPostingsFormat extends LuceneTestCase {
postingsConsumer.finishDoc();
docCount++;
}
- termsConsumer.finishTerm(term, new TermStats(postings.size(), totalTF));
+ termsConsumer.finishTerm(term, new TermStats(postings.size(), doFreq ? totalTF : -1));
sumTotalTF += totalTF;
sumDF += postings.size();
}
- termsConsumer.finish(sumTotalTF, sumDF, seenDocs.cardinality());
+ termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
}
fieldsConsumer.close();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
index fb33e38deb2..863de992625 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
@@ -18,16 +18,23 @@ package org.apache.lucene.codecs.asserting;
*/
import java.io.IOException;
+import java.util.Comparator;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.AssertingAtomicReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.util.BytesRef;
/**
* Just like {@link Lucene40PostingsFormat} but with additional asserts.
@@ -39,10 +46,9 @@ public class AssertingPostingsFormat extends PostingsFormat {
super("Asserting");
}
- // TODO: we could add some useful checks here?
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- return in.fieldsConsumer(state);
+ return new AssertingFieldsConsumer(in.fieldsConsumer(state));
}
@Override
@@ -85,4 +91,82 @@ public class AssertingPostingsFormat extends PostingsFormat {
return in.getUniqueTermCount();
}
}
+
+ static class AssertingFieldsConsumer extends FieldsConsumer {
+ private final FieldsConsumer in;
+
+ AssertingFieldsConsumer(FieldsConsumer in) {
+ this.in = in;
+ }
+
+ @Override
+ public TermsConsumer addField(FieldInfo field) throws IOException {
+ TermsConsumer consumer = in.addField(field);
+ assert consumer != null;
+ return new AssertingTermsConsumer(consumer, field);
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+ }
+
+ static enum TermsConsumerState { INITIAL, START, FINISHED };
+ static class AssertingTermsConsumer extends TermsConsumer {
+ private final TermsConsumer in;
+ private final FieldInfo fieldInfo;
+ private BytesRef lastTerm = null;
+ private TermsConsumerState state = TermsConsumerState.INITIAL;
+
+ AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
+ this.in = in;
+ this.fieldInfo = fieldInfo;
+ }
+
+ // TODO: AssertingPostingsConsumer
+ @Override
+ public PostingsConsumer startTerm(BytesRef text) throws IOException {
+ // TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
+ // TODO: this makes the api really confusing! we should try to clean this up!
+ assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
+ state = TermsConsumerState.START;
+ assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
+ lastTerm = BytesRef.deepCopyOf(text);
+ return in.startTerm(text);
+ }
+
+ @Override
+ public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+ assert state == TermsConsumerState.START;
+ state = TermsConsumerState.INITIAL;
+ assert text.equals(lastTerm);
+ assert stats.docFreq > 0; // otherwise, this method should not be called.
+ if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
+ assert stats.totalTermFreq == -1;
+ }
+ in.finishTerm(text, stats);
+ }
+
+ @Override
+ public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
+ // TODO: assert that if state == START (no finishTerm called), that no actual docs were fed.
+ // TODO: this makes the api really confusing! we should try to clean this up!
+ assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START;
+ state = TermsConsumerState.FINISHED;
+ assert docCount >= 0;
+ assert sumDocFreq >= docCount;
+ if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
+ assert sumTotalTermFreq == -1;
+ } else {
+ assert sumTotalTermFreq >= sumDocFreq;
+ }
+ in.finish(sumTotalTermFreq, sumDocFreq, docCount);
+ }
+
+ @Override
+ public Comparator getComparator() throws IOException {
+ return in.getComparator();
+ }
+ }
}
From 618c38fd98b582579e82759315cf477c1956ea82 Mon Sep 17 00:00:00 2001
From: David Wayne Smiley
Date: Mon, 23 Jul 2012 20:19:02 +0000
Subject: [PATCH 3/6] LUCENE-4223 spatial docs: overview.html, SpatialStrategy,
and added SpatialExample.java sample
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364782 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/spatial/SpatialStrategy.java | 22 ++-
lucene/spatial/src/java/overview.html | 43 ++++-
.../apache/lucene/spatial/SpatialExample.java | 180 ++++++++++++++++++
3 files changed, 241 insertions(+), 4 deletions(-)
create mode 100644 lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java
index a04a5805861..81eadf8083e 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/SpatialStrategy.java
@@ -28,10 +28,26 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.query.SpatialArgs;
/**
- * The SpatialStrategy encapsulates an approach to indexing and searching based on shapes.
+ * The SpatialStrategy encapsulates an approach to indexing and searching based
+ * on shapes.
*
- * Note that a SpatialStrategy is not involved with the Lucene stored field values of shapes, which is
- * immaterial to indexing & search.
+ * Different implementations will support different features. A strategy should
+ * document these common elements:
+ *
+ * - Can it index more than one shape per field?
+ * - What types of shapes can be indexed?
+ * - What types of query shapes can be used?
+ * - What types of query operations are supported?
+ * This might vary per shape.
+ * - Are there caches? Under what circumstances are they used?
+ * Roughly how big are they? Is it segmented by Lucene segments, such as is
+ * done by the Lucene {@link org.apache.lucene.search.FieldCache} and
+ * {@link org.apache.lucene.index.DocValues} (ideal) or is it for the entire
+ * index?
+ *
+ *
+ * Note that a SpatialStrategy is not involved with the Lucene stored field
+ * values of shapes, which is immaterial to indexing & search.
*
* Thread-safe.
*
diff --git a/lucene/spatial/src/java/overview.html b/lucene/spatial/src/java/overview.html
index 9ba5f0b63fe..51fe0031fec 100644
--- a/lucene/spatial/src/java/overview.html
+++ b/lucene/spatial/src/java/overview.html
@@ -16,8 +16,49 @@
-->
- Apache Lucene Spatial Strategies
+ Apache Lucene Spatial Module
+
+ The Spatial Module for Apache Lucene
+
+
+ The spatial module is new is Lucene 4, replacing the old contrib module
+ that came before it. The principle interface to the module is
+ a {@link org.apache.lucene.spatial.SpatialStrategy}
+ which encapsulates an approach to indexing and searching
+ based on shapes. Different Strategies have different features and
+ performance profiles, which are documented at each Strategy class level.
+
+
+ For some sample code showing how to use the API, see SpatialExample.java in
+ the tests.
+
+
+ The spatial module uses
+ Spatial4j
+ heavily. Spatial4j is an ASL licensed library with these capabilities:
+
+ - Provides shape implementations, namely point, rectangle,
+ and circle. Both geospatial contexts and plain 2D Euclidean/Cartesian contexts
+ are supported.
+ With an additional dependency, it adds polygon and other geometry shape
+ support via integration with
+ JTS Topology Suite.
+ This includes dateline wrap support.
+ - Shape parsing and serialization, including
+ Well-Known Text (WKT)
+ (via JTS).
+ - Distance and other spatial related math calculations.
+
+
+
+ Historical note: The new spatial module was once known as
+ Lucene Spatial Playground (LSP) as an external project. In ~March 2012, LSP
+ split into this new module as part of Lucene and Spatial4j externally. A
+ large chunk of the LSP implementation originated as SOLR-2155 which uses
+ trie/prefix-tree algorithms with a geohash encoding.
+
+
\ No newline at end of file
diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java
new file mode 100644
index 00000000000..a3963c02bf8
--- /dev/null
+++ b/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java
@@ -0,0 +1,180 @@
+package org.apache.lucene.spatial;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.context.simple.SimpleSpatialContext;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
+import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
+import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialArgsParser;
+import org.apache.lucene.spatial.query.SpatialOperation;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+
+/**
+ * This class serves as example code to show how to use the Lucene spatial
+ * module.
+ */
+public class SpatialExample extends RandomizedTest {
+
+ public static void main(String[] args) throws IOException {
+ new SpatialExample().test();
+ }
+
+ public void test() throws IOException {
+ init();
+ indexPoints();
+ search();
+ }
+
+ /**
+ * The Spatial4j {@link SpatialContext} is a sort of global-ish singleton
+ * needed by Lucene spatial. It's a facade to the rest of Spatial4j, acting
+ * as a factory for {@link Shape}s and provides access to reading and writing
+ * them from Strings.
+ */
+ private SpatialContext ctx;//"ctx" is the conventional variable name
+
+ /**
+ * The Lucene spatial {@link SpatialStrategy} encapsulates an approach to
+ * indexing and searching shapes, and providing relevancy scores for them.
+ * It's a simple API to unify different approaches.
+ *
+ * Note that these are initialized with a field name.
+ */
+ private SpatialStrategy strategy;
+
+ private Directory directory;
+
+ protected void init() {
+ //Typical geospatial context with kilometer units.
+ // These can also be constructed from a factory: SpatialContextFactory
+ this.ctx = SimpleSpatialContext.GEO_KM;
+
+ int maxLevels = 10;//results in sub-meter precision for geohash
+ //TODO demo lookup by detail distance
+ // This can also be constructed from a factory: SpatialPrefixTreeFactory
+ SpatialPrefixTree grid = new GeohashPrefixTree(ctx, maxLevels);
+
+ this.strategy = new RecursivePrefixTreeStrategy(grid, "myGeoField");
+
+ this.directory = new RAMDirectory();
+ }
+
+ private void indexPoints() throws IOException {
+ IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_50,null);
+ IndexWriter indexWriter = new IndexWriter(directory, iwConfig);
+
+ //Spatial4j is x-y order for arguments
+ indexWriter.addDocument(newSampleDocument(
+ 2, ctx.makePoint(-80.93, 33.77)));
+
+ //When parsing a string to a shape, the presence of a comma means it's y-x
+ // order (lon, lat)
+ indexWriter.addDocument(newSampleDocument(
+ 4, ctx.readShape("-50.7693246, 60.9289094")));
+
+ indexWriter.addDocument(newSampleDocument(
+ 20, ctx.makePoint(0.1,0.1), ctx.makePoint(0, 0)));
+
+ indexWriter.close();
+ }
+
+ private Document newSampleDocument(int id, Shape... shapes) {
+ Document doc = new Document();
+ doc.add(new IntField("id", id, Field.Store.YES));
+ //Potentially more than one shape in this field is supported by some
+ // strategies; see the javadocs of the SpatialStrategy impl to see.
+ for (Shape shape : shapes) {
+ for (IndexableField f : strategy.createIndexableFields(shape)) {
+ doc.add(f);
+ }
+ //store it too; the format is up to you
+ doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
+ }
+
+ return doc;
+ }
+
+ private void search() throws IOException {
+ IndexReader indexReader = DirectoryReader.open(directory);
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+ Sort idSort = new Sort(new SortField("id", SortField.Type.INT));
+
+ //--Filter by circle (<= distance from a point)
+ {
+ //Search with circle
+ //note: SpatialArgs can be parsed from a string
+ SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
+ ctx.makeCircle(-80.0, 33.0, 200));//200km (since km == ctx.getDistanceUnits
+ Filter filter = strategy.makeFilter(args);
+ TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), filter, 10, idSort);
+ assertDocMatchedIds(indexSearcher, docs, 2);
+ }
+ //--Match all, order by distance
+ {
+ SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,//doesn't matter
+ ctx.makePoint(60, -50));
+ ValueSource valueSource = strategy.makeValueSource(args);//the distance
+ Sort reverseDistSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);//true=asc dist
+ TopDocs docs = indexSearcher.search(new MatchAllDocsQuery(), 10, reverseDistSort);
+ assertDocMatchedIds(indexSearcher, docs, 4, 20, 2);
+ }
+ //demo arg parsing
+ {
+ SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
+ ctx.makeCircle(-80.0, 33.0, 200));
+ SpatialArgs args2 = new SpatialArgsParser().parse("Intersects(Circle(33,-80 d=200))", ctx);
+ assertEquals(args.toString(),args2.toString());
+ }
+
+ indexReader.close();
+ }
+
+ private void assertDocMatchedIds(IndexSearcher indexSearcher, TopDocs docs, int... ids) throws IOException {
+ int[] gotIds = new int[docs.totalHits];
+ for (int i = 0; i < gotIds.length; i++) {
+ gotIds[i] = indexSearcher.doc(docs.scoreDocs[i].doc).getField("id").numericValue().intValue();
+ }
+ assertArrayEquals(ids,gotIds);
+ }
+
+}
From 87182914a334d928b966cfcd8a2983c1973e01ac Mon Sep 17 00:00:00 2001
From: Mark Robert Miller
Date: Mon, 23 Jul 2012 20:28:13 +0000
Subject: [PATCH 4/6] since we raised the session timeout, this needs to be
willing to poll longer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364786 13f79535-47bb-0310-9956-ffa450edef68
---
.../org/apache/solr/cloud/LeaderElectionIntegrationTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
index 887c8f2e6ad..d55ed7ec9f0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionIntegrationTest.java
@@ -191,7 +191,7 @@ public class LeaderElectionIntegrationTest extends SolrTestCaseJ4 {
int newLeaderPort = getLeaderPort(leader);
int retry = 0;
while (leaderPort == newLeaderPort) {
- if (retry++ == 20) {
+ if (retry++ == 60) {
break;
}
Thread.sleep(1000);
From e0d137f8e2ad41039a8486601e1ff0463f25840e Mon Sep 17 00:00:00 2001
From: Robert Muir
Date: Mon, 23 Jul 2012 20:46:08 +0000
Subject: [PATCH 5/6] LUCENE-4828: add AssertingPostingsConsumer, fix minor
inconsistencies in producers
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364792 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/codecs/PostingsConsumer.java | 9 +-
.../apache/lucene/codecs/TermsConsumer.java | 8 +-
.../index/FreqProxTermsWriterPerField.java | 6 +-
.../org/apache/lucene/index/TestCodecs.java | 4 +-
.../lucene/index/TestPostingsFormat.java | 2 +-
.../asserting/AssertingPostingsFormat.java | 101 ++++++++++++++++--
6 files changed, 110 insertions(+), 20 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
index e31f4b79343..099a1f26622 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
@@ -49,14 +49,17 @@ import org.apache.lucene.util.FixedBitSet;
*/
public abstract class PostingsConsumer {
- /** Adds a new doc in this term. */
+ /** Adds a new doc in this term.
+ * freq
will be -1 when term frequencies are omitted
+ * for the field. */
public abstract void startDoc(int docID, int freq) throws IOException;
/** Add a new position & payload, and start/end offset. A
* null payload means no payload; a non-null payload with
* zero length also means no payload. Caller may reuse
* the {@link BytesRef} for the payload between calls
- * (method must fully consume the payload). */
+ * (method must fully consume the payload). startOffset
+ * and endOffset
will be -1 when offsets are not indexed. */
public abstract void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException;
/** Called when we are done adding positions & payloads
@@ -78,7 +81,7 @@ public abstract class PostingsConsumer {
break;
}
visitedDocs.set(doc);
- this.startDoc(doc, 0);
+ this.startDoc(doc, -1);
this.finishDoc();
df++;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
index 4148430f408..85dc6132b49 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
@@ -57,10 +57,14 @@ public abstract class TermsConsumer {
* no docs. */
public abstract PostingsConsumer startTerm(BytesRef text) throws IOException;
- /** Finishes the current term; numDocs must be > 0. */
+ /** Finishes the current term; numDocs must be > 0.
+ * stats.totalTermFreq
will be -1 when term
+ * frequencies are omitted for the field. */
public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
- /** Called when we are done adding terms to this field */
+ /** Called when we are done adding terms to this field.
+ * sumTotalTermFreq
will be -1 when term
+ * frequencies are omitted for the field. */
public abstract void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException;
/** Return the BytesRef Comparator used to sort terms
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
index 6a5f1f119bf..658ea591791 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@@ -430,7 +430,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
if (readTermFreq) {
termDocFreq = postings.docFreqs[termID];
} else {
- termDocFreq = 0;
+ termDocFreq = -1;
}
postings.lastDocCodes[termID] = -1;
} else {
@@ -441,7 +441,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
final int code = freq.readVInt();
if (!readTermFreq) {
docID += code;
- termDocFreq = 0;
+ termDocFreq = -1;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
@@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
// 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush.
visitedDocs.set(docID);
- postingsConsumer.startDoc(docID, termDocFreq);
+ postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip
// writing its postings; this would be
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index 8be1027cd0a..fdb9307731c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -154,7 +154,7 @@ public class TestCodecs extends LuceneTestCase {
for(int i=0;i