From d8e0288109f05ffc828e7f41a0f00b2c6f707151 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Wed, 16 May 2012 08:01:40 +0000 Subject: [PATCH 01/47] LUCENE-4060: port to trunk git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339047 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../example/merge/TaxonomyMergeUtils.java | 2 +- .../facet/taxonomy/directory/Consts.java | 3 +- .../directory/DirectoryTaxonomyWriter.java | 208 ++++---------- .../taxonomy/directory/TestAddTaxonomies.java | 254 ------------------ .../taxonomy/directory/TestAddTaxonomy.java | 228 ++++++++++++++++ 6 files changed, 280 insertions(+), 420 deletions(-) delete mode 100644 lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java create mode 100644 lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 17ea33945f8..5d86c224963 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -953,6 +953,11 @@ Bug fixes offset calculation in PathHierarchyTokenizer. (Mike McCandless, Uwe Schindler, Robert Muir) +* LUCENE-4060: Fix a synchronization bug in + DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to + addTaxonomy and now takes only one Directory and one OrdinalMap. + (Shai Erera, Gilad Barkai) + Documentation * LUCENE-3958: Javadocs corrections for IndexWriter. diff --git a/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java b/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java index 40dfac53dfd..835c8e846f5 100644 --- a/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java +++ b/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java @@ -81,7 +81,7 @@ public class TaxonomyMergeUtils { OrdinalMap map, IndexWriter destIndexWriter, DirectoryTaxonomyWriter destTaxWriter) throws IOException { // merge the taxonomies - destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map }); + destTaxWriter.addTaxonomy(srcTaxDir, map); PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider( srcIndexDir, map.getMap(), new DefaultFacetIndexingParams()); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java index ec6e842c067..2d7988ad152 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java @@ -4,8 +4,6 @@ import java.io.IOException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.StoredFieldVisitor.Status; -import org.apache.lucene.store.IndexInput; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -42,6 +40,7 @@ abstract class Consts { public static final class LoadFullPathOnly extends StoredFieldVisitor { private String fullPath; + @Override public void stringField(FieldInfo fieldInfo, String value) throws IOException { fullPath = value; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index e1dfbea534d..b77e0f1bcfe 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -12,15 +12,21 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; -import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache; +import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache; +import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsEnum; @@ -30,9 +36,9 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.AlreadyClosedException; @@ -44,13 +50,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.TaxonomyWriter; -import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache; -import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache; -import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -812,6 +811,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { } return parentArray; } + @Override public int getParent(int ordinal) throws IOException { ensureOpen(); @@ -823,158 +823,47 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { } return getParentArray().getArray()[ordinal]; } - + /** - * Take all the categories of one or more given taxonomies, and add them to - * the main taxonomy (this), if they are not already there. - *

- * Additionally, fill a mapping for each of the added taxonomies, - * mapping its ordinals to the ordinals in the enlarged main taxonomy. - * These mapping are saved into an array of OrdinalMap objects given by the - * user, one for each of the given taxonomies (not including "this", the main - * taxonomy). Often the first of these will be a MemoryOrdinalMap and the - * others will be a DiskOrdinalMap - see discussion in {OrdinalMap}. - *

- * Note that the taxonomies to be added are given as Directory objects, - * not opened TaxonomyReader/TaxonomyWriter objects, so if any of them are - * currently managed by an open TaxonomyWriter, make sure to commit() (or - * close()) it first. The main taxonomy (this) is an open TaxonomyWriter, - * and does not need to be commit()ed before this call. + * Takes the categories from the given taxonomy directory, and adds the + * missing ones to this taxonomy. Additionally, it fills the given + * {@link OrdinalMap} with a mapping from the original ordinal to the new + * ordinal. */ - public void addTaxonomies(Directory[] taxonomies, OrdinalMap[] ordinalMaps) throws IOException { + public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException { ensureOpen(); - // To prevent us stepping on the rest of this class's decisions on when - // to open a reader, and when not, we'll be opening a new reader instead - // of using the existing "reader" object: - IndexReader mainreader = openReader(); - // TODO (Facet): can this then go segment-by-segment and avoid MultiDocsEnum etc? - Terms terms = MultiFields.getTerms(mainreader, Consts.FULL); - assert terms != null; // TODO (Facet): explicit check / throw exception? - TermsEnum mainte = terms.iterator(null); - DocsEnum mainde = null; - - IndexReader[] otherreaders = new IndexReader[taxonomies.length]; - TermsEnum[] othertes = new TermsEnum[taxonomies.length]; - DocsEnum[] otherdocsEnum = new DocsEnum[taxonomies.length]; // just for reuse - for (int i=0; i0) { - // TODO: use a pq here - String first=null; - for (int i=0; i0) { - first = currentOthers[i]; - } - } - int comp = 0; - if (currentMain==null || (comp = currentMain.compareTo(first))>0) { - // If 'first' is before currentMain, or currentMain is null, - // then 'first' is a new category and we need to add it to the - // main taxonomy. Then for all taxonomies with this 'first' - // category, we need to add the new category number to their - // map, and move to the next category in all of them. + IndexReader r = DirectoryReader.open(taxoDir); + try { + final int size = r.numDocs(); + final OrdinalMap ordinalMap = map; + ordinalMap.setSize(size); + CategoryPath cp = new CategoryPath(); + Terms terms = MultiFields.getTerms(r, Consts.FULL); + TermsEnum te = terms.iterator(null); + Bits liveDocs = MultiFields.getLiveDocs(r); + DocsEnum docs = null; + // we call next() first, to skip the root category which always exists. + while (te.next() != null) { + String value = te.term().utf8ToString(); cp.clear(); - cp.add(first, delimiter); - // We can call internalAddCategory() instead of addCategory() - // because we know the category hasn't been seen yet. - int newordinal = internalAddCategory(cp, cp.length()); - // TODO (Facet): we already had this term in our hands before, in nextTE... - // // TODO (Facet): no need to make this term? - for (int i=0; i 0 */ { - // The currentMain doesn't appear in any of the other taxonomies - - // we don't need to do anything, just continue to the next one - currentMain = nextTE(mainte); + docs = te.docs(liveDocs, docs, false); + ordinalMap.addMapping(docs.nextDoc(), ordinal); } - } - - // Close all the readers we've opened, and also tell the ordinal maps - // we're done adding to them - mainreader.close(); - for (int i=0; i1) { - for (int i=0; i= copytr.getSize()); - } else { - assertEquals(copytr.getSize(), tr.getSize()); - } - for (int j=0; j copytr.getSize()) { - String prev = tr.getPath(copytr.getSize()).toString(); - for (int j=copytr.getSize()+1; j 0); + assertEquals(destOrdinal, map[j]); + } + } finally { + srcTR.close(); + } + } finally { + destTR.close(); + } + } + + public void testAddEmpty() throws Exception { + Directory dest = newDirectory(); + DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest); + destTW.addCategory(new CategoryPath("Author", "Rob Pike")); + destTW.addCategory(new CategoryPath("Aardvarks", "Bob")); + destTW.commit(); + + Directory src = newDirectory(); + new DirectoryTaxonomyWriter(src).close(); // create an empty taxonomy + + OrdinalMap map = randomOrdinalMap(); + destTW.addTaxonomy(src, map); + destTW.close(); + + validate(dest, src, map); + + IOUtils.close(dest, src); + } + + public void testAddToEmpty() throws Exception { + Directory dest = newDirectory(); + + Directory src = newDirectory(); + DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src); + srcTW.addCategory(new CategoryPath("Author", "Rob Pike")); + srcTW.addCategory(new CategoryPath("Aardvarks", "Bob")); + srcTW.close(); + + DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest); + OrdinalMap map = randomOrdinalMap(); + destTW.addTaxonomy(src, map); + destTW.close(); + + validate(dest, src, map); + + IOUtils.close(dest, src); + } + + // A more comprehensive and big random test. + @Nightly + public void testBig() throws Exception { + dotest(200, 10000); + dotest(1000, 20000); + // really big + dotest(400000, 1000000); + } + + // a reasonable random test + public void testMedium() throws Exception { + Random random = random(); + int numTests = atLeast(3); + for (int i = 0; i < numTests; i++) { + dotest(_TestUtil.nextInt(random, 2, 100), + _TestUtil.nextInt(random, 100, 1000)); + } + } + + public void testSimple() throws Exception { + Directory dest = newDirectory(); + DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dest); + tw1.addCategory(new CategoryPath("Author", "Mark Twain")); + tw1.addCategory(new CategoryPath("Animals", "Dog")); + tw1.addCategory(new CategoryPath("Author", "Rob Pike")); + + Directory src = newDirectory(); + DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(src); + tw2.addCategory(new CategoryPath("Author", "Rob Pike")); + tw2.addCategory(new CategoryPath("Aardvarks", "Bob")); + tw2.close(); + + OrdinalMap map = randomOrdinalMap(); + + tw1.addTaxonomy(src, map); + tw1.close(); + + validate(dest, src, map); + + IOUtils.close(dest, src); + } + + public void testConcurrency() throws Exception { + // tests that addTaxonomy and addCategory work in parallel + final int numCategories = atLeast(5000); + + // build an input taxonomy index + Directory src = newDirectory(); + DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src); + for (int i = 0; i < numCategories; i++) { + tw.addCategory(new CategoryPath("a", Integer.toString(i))); + } + tw.close(); + + // now add the taxonomy to an empty taxonomy, while adding the categories + // again, in parallel -- in the end, no duplicate categories should exist. + Directory dest = newDirectory(); + final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest); + Thread t = new Thread() { + @Override + public void run() { + for (int i = 0; i < numCategories; i++) { + try { + destTW.addCategory(new CategoryPath("a", Integer.toString(i))); + } catch (IOException e) { + // shouldn't happen - if it does, let the test fail on uncaught exception. + throw new RuntimeException(e); + } + } + } + }; + t.start(); + + OrdinalMap map = new MemoryOrdinalMap(); + destTW.addTaxonomy(src, map); + t.join(); + destTW.close(); + + // now validate + + DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest); + // +2 to account for the root category + "a" + assertEquals(numCategories + 2, dtr.getSize()); + HashSet categories = new HashSet(); + for (int i = 1; i < dtr.getSize(); i++) { + CategoryPath cat = dtr.getPath(i); + assertTrue("category " + cat + " already existed", categories.add(cat)); + } + dtr.close(); + + IOUtils.close(src, dest); + } + +} From 10de0207955c8c8edaac127c65213abdc70a7535 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Wed, 16 May 2012 09:55:44 +0000 Subject: [PATCH 02/47] LUCENE-4059: Improve ANT task prepare-webpages (used by documentation tasks) to correctly encode build file names as URIs for later processing by XSL git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339097 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ++++ lucene/build.xml | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5d86c224963..c3127c169aa 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -995,6 +995,10 @@ Build * LUCENE-3286: Moved remainder of contrib/xml-query-parser to lucene/queryparser. Classes now found at org.apache.lucene.queryparser.xml.* +* LUCENE-4059: Improve ANT task prepare-webpages (used by documentation + tasks) to correctly encode build file names as URIs for later processing by + XSL. (Greg Bowyer, Uwe Schindler) + ======================= Lucene 3.6.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/build.xml b/lucene/build.xml index 7e16b01354d..91d3d3ce84c 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -225,8 +225,13 @@ - + + + + self.addMappedName((new java.io.File(source)).toURI()); + + - - - - + + + + + diff --git a/solr/example/multicore/core0/conf/solrconfig.xml b/solr/example/multicore/core0/conf/solrconfig.xml index 81c52bbe88e..18d842b718a 100644 --- a/solr/example/multicore/core0/conf/solrconfig.xml +++ b/solr/example/multicore/core0/conf/solrconfig.xml @@ -29,7 +29,22 @@ ${solr.core0.data.dir:} - + + + ${solr.core0.data.dir:} + + + + + + + true + + + + diff --git a/solr/example/multicore/core1/conf/schema.xml b/solr/example/multicore/core1/conf/schema.xml index d02865df407..5a27d39df2d 100644 --- a/solr/example/multicore/core1/conf/schema.xml +++ b/solr/example/multicore/core1/conf/schema.xml @@ -19,14 +19,16 @@ + - - - - + + + + + diff --git a/solr/example/multicore/core1/conf/solrconfig.xml b/solr/example/multicore/core1/conf/solrconfig.xml index 73e816f00e6..7bc5a3ac6dd 100644 --- a/solr/example/multicore/core1/conf/solrconfig.xml +++ b/solr/example/multicore/core1/conf/solrconfig.xml @@ -29,7 +29,22 @@ ${solr.core1.data.dir:} - + + + ${solr.core1.data.dir:} + + + + + + + true + + + + From 8db469ef018fd7131a3dea6f94eb3c0ae57454d8 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 17 May 2012 17:59:42 +0000 Subject: [PATCH 22/47] Fix typo in defaultValue parameter name git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339750 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/solr/response/transform/ValueAugmenterFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java index a62e709f2d1..f0eeafbc4f7 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ValueAugmenterFactory.java @@ -37,7 +37,7 @@ public class ValueAugmenterFactory extends TransformerFactory public void init(NamedList args) { value = args.get( "value" ); if( value == null ) { - defaultValue = args.get( "deaultValue" ); + defaultValue = args.get( "defaultValue" ); } } From fc84112ece8e7306083283c7d3b099e97bdbcbfb Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 17 May 2012 18:03:13 +0000 Subject: [PATCH 23/47] add hepburn test and fix some corner cases git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339753 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/ja/util/ToStringUtil.java | 69 ++++++++++++++-- .../analysis/ja/util/TestToStringUtil.java | 78 +++++++++++++++++++ 2 files changed, 142 insertions(+), 5 deletions(-) diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java index c83de194d7a..977ab49f0e8 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java @@ -252,6 +252,8 @@ public class ToStringUtil { /** * Romanize katakana with modified hepburn */ + // TODO: now that this is used by readingsfilter and not just for + // debugging, fix this to really be a scheme that works best with IMEs public static void getRomanization(Appendable builder, CharSequence s) throws IOException { final int len = s.length(); for (int i = 0; i < len; i++) { @@ -522,6 +524,9 @@ public class ToStringUtil { if (ch2 == 'ウ') { builder.append("tō"); i++; + } else if (ch2 == 'ゥ') { + builder.append("tu"); + i++; } else { builder.append("to"); } @@ -665,7 +670,7 @@ public class ToStringUtil { builder.append("mu"); break; case 'メ': - builder.append("mi"); + builder.append("me"); break; case 'モ': if (ch2 == 'ウ') { @@ -690,7 +695,12 @@ public class ToStringUtil { } break; case 'ラ': - builder.append("ra"); + if (ch2 == '゜') { + builder.append("la"); + i++; + } else { + builder.append("ra"); + } break; case 'リ': if (ch2 == 'ョ' && ch3 == 'ウ') { @@ -711,20 +721,36 @@ public class ToStringUtil { } else if (ch2 == 'ェ') { builder.append("rye"); i++; + } else if (ch2 == '゜') { + builder.append("li"); + i++; } else { builder.append("ri"); } break; case 'ル': - builder.append("ru"); + if (ch2 == '゜') { + builder.append("lu"); + i++; + } else { + builder.append("ru"); + } break; case 'レ': - builder.append("re"); + if (ch2 == '゜') { + builder.append("le"); + i++; + } else { + builder.append("re"); + } break; case 'ロ': if (ch2 == 'ウ') { builder.append("rō"); i++; + } else if (ch2 == '゜') { + builder.append("lo"); + i++; } else { builder.append("ro"); } @@ -887,7 +913,28 @@ public class ToStringUtil { builder.append("da"); break; case 'ヂ': - builder.append("ji"); + // TODO: investigate all this + if (ch2 == 'ョ' && ch3 == 'ウ') { + builder.append("jō"); + i += 2; + } else if (ch2 == 'ュ' && ch3 == 'ウ') { + builder.append("jū"); + i += 2; + } else if (ch2 == 'ャ') { + builder.append("ja"); + i++; + } else if (ch2 == 'ョ') { + builder.append("jo"); + i++; + } else if (ch2 == 'ュ') { + builder.append("ju"); + i++; + } else if (ch2 == 'ェ') { + builder.append("je"); + i++; + } else { + builder.append("ji"); + } break; case 'ヅ': builder.append("zu"); @@ -994,6 +1041,18 @@ public class ToStringUtil { builder.append("po"); } break; + case 'ヷ': + builder.append("va"); + break; + case 'ヸ': + builder.append("vi"); + break; + case 'ヹ': + builder.append("ve"); + break; + case 'ヺ': + builder.append("vo"); + break; case 'ヴ': if (ch2 == 'ィ' && ch3 == 'ェ') { builder.append("vye"); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java index f95a527dcb9..a2388d7c03c 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java @@ -17,6 +17,9 @@ package org.apache.lucene.analysis.ja.util; * limitations under the License. */ +import java.util.HashMap; +import java.util.Map; + import org.apache.lucene.util.LuceneTestCase; public class TestToStringUtil extends LuceneTestCase { @@ -31,4 +34,79 @@ public class TestToStringUtil extends LuceneTestCase { assertEquals("chashu", ToStringUtil.getRomanization("チャーシュー")); assertEquals("shumai", ToStringUtil.getRomanization("シューマイ")); } + + // see http://en.wikipedia.org/wiki/Hepburn_romanization, + // but this isnt even thorough or really probably what we want! + public void testHepburnTable() { + Map table = new HashMap() {{ + put("ア", "a"); put("イ", "i"); put("ウ", "u"); put("エ", "e"); put("オ", "o"); + put("カ", "ka"); put("キ", "ki"); put("ク", "ku"); put("ケ", "ke"); put("コ", "ko"); + put("サ", "sa"); put("シ", "shi"); put("ス", "su"); put("セ", "se"); put("ソ", "so"); + put("タ", "ta"); put("チ", "chi"); put("ツ", "tsu"); put("テ", "te"); put("ト", "to"); + put("ナ", "na"); put("ニ", "ni"); put("ヌ", "nu"); put("ネ", "ne"); put("ノ", "no"); + put("ハ", "ha"); put("ヒ", "hi"); put("フ", "fu"); put("ヘ", "he"); put("ホ", "ho"); + put("マ", "ma"); put("ミ", "mi"); put("ム", "mu"); put("メ", "me"); put("モ", "mo"); + put("ヤ", "ya"); put("ユ", "yu"); put("ヨ", "yo"); + put("ラ", "ra"); put("リ", "ri"); put("ル", "ru"); put("レ", "re"); put("ロ", "ro"); + put("ワ", "wa"); put("ヰ", "i"); put("ヱ", "e"); put("ヲ", "o"); + put("ン", "n"); + put("ガ", "ga"); put("ギ", "gi"); put("グ", "gu"); put("ゲ", "ge"); put("ゴ", "go"); + put("ザ", "za"); put("ジ", "ji"); put("ズ", "zu"); put("ゼ", "ze"); put("ゾ", "zo"); + put("ダ", "da"); put("ヂ", "ji"); put("ヅ", "zu"); put("デ", "de"); put("ド", "do"); + put("バ", "ba"); put("ビ", "bi"); put("ブ", "bu"); put("ベ", "be"); put("ボ", "bo"); + put("パ", "pa"); put("ピ", "pi"); put("プ", "pu"); put("ペ", "pe"); put("ポ", "po"); + + put("キャ", "kya"); put("キュ", "kyu"); put("キョ", "kyo"); + put("シャ", "sha"); put("シュ", "shu"); put("ショ", "sho"); + put("チャ", "cha"); put("チュ", "chu"); put("チョ", "cho"); + put("ニャ", "nya"); put("ニュ", "nyu"); put("ニョ", "nyo"); + put("ヒャ", "hya"); put("ヒュ", "hyu"); put("ヒョ", "hyo"); + put("ミャ", "mya"); put("ミュ", "myu"); put("ミョ", "myo"); + put("リャ", "rya"); put("リュ", "ryu"); put("リョ", "ryo"); + put("ギャ", "gya"); put("ギュ", "gyu"); put("ギョ", "gyo"); + put("ジャ", "ja"); put("ジュ", "ju"); put("ジョ", "jo"); + put("ヂャ", "ja"); put("ヂュ", "ju"); put("ヂョ", "jo"); + put("ビャ", "bya"); put("ビュ", "byu"); put("ビョ", "byo"); + put("ピャ", "pya"); put("ピュ", "pyu"); put("ピョ", "pyo"); + + put("イィ", "yi"); put("イェ", "ye"); + put("ウァ", "wa"); put("ウィ", "wi"); put("ウゥ", "wu"); put("ウェ", "we"); put("ウォ", "wo"); + put("ウュ", "wyu"); + // TODO: really should be vu + put("ヴァ", "va"); put("ヴィ", "vi"); put("ヴ", "v"); put("ヴェ", "ve"); put("ヴォ", "vo"); + put("ヴャ", "vya"); put("ヴュ", "vyu"); put("ヴィェ", "vye"); put("ヴョ", "vyo"); + put("キェ", "kye"); + put("ギェ", "gye"); + put("クァ", "kwa"); put("クィ", "kwi"); put("クェ", "kwe"); put("クォ", "kwo"); + put("クヮ", "kwa"); + put("グァ", "gwa"); put("グィ", "gwi"); put("グェ", "gwe"); put("グォ", "gwo"); + put("グヮ", "gwa"); + put("シェ", "she"); + put("ジェ", "je"); + put("スィ", "si"); + put("ズィ", "zi"); + put("チェ", "che"); + put("ツァ", "tsa"); put("ツィ", "tsi"); put("ツェ", "tse"); put("ツォ", "tso"); + put("ツュ", "tsyu"); + put("ティ", "ti"); put("トゥ", "tu"); + put("テュ", "tyu"); + put("ディ", "di"); put("ドゥ", "du"); + put("デュ", "dyu"); + put("ニェ", "nye"); + put("ヒェ", "hye"); + put("ビェ", "bye"); + put("ピェ", "pye"); + put("ファ", "fa"); put("フィ", "fi"); put("フェ", "fe"); put("フォ", "fo"); + put("フャ", "fya"); put("フュ", "fyu"); put("フィェ", "fye"); put("フョ", "fyo"); + put("ホゥ", "hu"); + put("ミェ", "mye"); + put("リェ", "rye"); + put("ラ゜", "la"); put("リ゜", "li"); put("ル゜", "lu"); put("レ゜", "le"); put("ロ゜", "lo"); + put("ヷ", "va"); put("ヸ", "vi"); put("ヹ", "ve"); put("ヺ", "vo"); + }}; + + for (String s : table.keySet()) { + assertEquals(s, table.get(s), ToStringUtil.getRomanization(s)); + } + } } From 28b1e9e4edcdb5600f8368847d7a228445d6a94f Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 17 May 2012 18:33:15 +0000 Subject: [PATCH 24/47] LUCENE-4066: compare actual # terms in the queue instead of unique terms git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339779 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java b/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java index ba43d4d842e..a2903cf2265 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java @@ -68,7 +68,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase { assertAllBetween(last2, j, bd2, ids); last2 = j + 1; } - assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes()); + assertEquals(j+1, queue.numGlobalTermDeletes()); } assertEquals(uniqueValues, bd1.terms.keySet()); assertEquals(uniqueValues, bd2.terms.keySet()); From a137d089c8fee227d2a350037c8a3b0296fb6f70 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 17 May 2012 18:34:04 +0000 Subject: [PATCH 25/47] Update clustering /browse UI to reflect long past change with Carrot dependencies that used to require a system property switch to enable git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339781 13f79535-47bb-0310-9956-ffa450edef68 --- solr/example/solr/conf/solrconfig.xml | 8 +------- solr/example/solr/conf/velocity/cluster.vm | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index 787b078c119..5f66ae8bc60 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -1261,13 +1261,8 @@ http://wiki.apache.org/solr/ClusteringComponent - You'll need to set the solr.cluster.enabled system property - when running solr to run with clustering enabled: - - java -Dsolr.clustering.enabled=true -jar start.jar --> - @@ -1333,7 +1328,6 @@ --> true diff --git a/solr/example/solr/conf/velocity/cluster.vm b/solr/example/solr/conf/velocity/cluster.vm index ab2f0e458eb..c798e6a8d12 100644 --- a/solr/example/solr/conf/velocity/cluster.vm +++ b/solr/example/solr/conf/velocity/cluster.vm @@ -1,6 +1,6 @@

Clusters

- Run Solr with java -Dsolr.clustering.enabled=true -jar start.jar to see results + Loading clusters...
+ diff --git a/solr/example/solr/conf/velocity/header.vm b/solr/example/solr/conf/velocity/header.vm index 2eb80782fb0..46f0bab7d43 100644 --- a/solr/example/solr/conf/velocity/header.vm +++ b/solr/example/solr/conf/velocity/header.vm @@ -1,3 +1,3 @@ \ No newline at end of file From 6248459183c97eb4c49b0082a7ea6bf1875fc176 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 18 May 2012 07:53:33 +0000 Subject: [PATCH 29/47] LUCENE-4054: nested suite classes (required for tests) should not run in stand-alone mode. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340021 13f79535-47bb-0310-9956-ffa450edef68 --- .../junitcompat/TestReproduceMessage.java | 12 +--- .../util/junitcompat/WithNestedTests.java | 36 ++++------ .../apache/lucene/util/LuceneTestCase.java | 3 +- .../lucene/util/TestRuleIgnoreTestSuites.java | 67 +++++++++++++++++++ 4 files changed, 85 insertions(+), 33 deletions(-) create mode 100644 lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreTestSuites.java diff --git a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestReproduceMessage.java b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestReproduceMessage.java index a7e79ae2684..7df711f9670 100644 --- a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestReproduceMessage.java +++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestReproduceMessage.java @@ -53,9 +53,7 @@ public class TestReproduceMessage extends WithNestedTests { public Statement apply(final Statement base, Description description) { return new Statement() { public void evaluate() throws Throwable { - if (isRunningNested()) { - triggerOn(SorePoint.RULE); - } + triggerOn(SorePoint.RULE); base.evaluate(); } }; @@ -69,9 +67,7 @@ public class TestReproduceMessage extends WithNestedTests { @Before public void before() { - if (isRunningNested()) { - triggerOn(SorePoint.BEFORE); - } + triggerOn(SorePoint.BEFORE); } @Test @@ -81,9 +77,7 @@ public class TestReproduceMessage extends WithNestedTests { @After public void after() { - if (isRunningNested()) { - triggerOn(SorePoint.AFTER); - } + triggerOn(SorePoint.AFTER); } @AfterClass diff --git a/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java b/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java index af844ee9f82..adf0f94f735 100644 --- a/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java +++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java @@ -22,15 +22,18 @@ import java.io.PrintStream; import java.io.UnsupportedEncodingException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestRuleIgnoreTestSuites; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.ClassRule; +import org.junit.Rule; import org.junit.rules.TestRule; import org.junit.runner.Description; import org.junit.runners.model.Statement; import com.carrotsearch.randomizedtesting.RandomizedRunner; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; /** * An abstract test class that prepares nested test classes to run. @@ -45,28 +48,11 @@ import com.carrotsearch.randomizedtesting.RandomizedRunner; * cause havoc (static fields). */ public abstract class WithNestedTests { - /** - * This can no longer be thread local because {@link RandomizedRunner} runs - * suites in an isolated threadgroup/thread. - */ - public static volatile boolean runsAsNested; - - public static abstract class AbstractNestedTest extends LuceneTestCase { - @ClassRule - public static TestRule ignoreIfRunAsStandalone = new TestRule() { - public Statement apply(final Statement s, Description arg1) { - return new Statement() { - public void evaluate() throws Throwable { - if (isRunningNested()) { - s.evaluate(); - } - } - }; - } - }; + public static abstract class AbstractNestedTest extends LuceneTestCase + implements TestRuleIgnoreTestSuites.NestedTestSuite { protected static boolean isRunningNested() { - return runsAsNested; + return TestRuleIgnoreTestSuites.isRunningNested(); } } @@ -81,6 +67,12 @@ public abstract class WithNestedTests { private ByteArrayOutputStream sysout; private ByteArrayOutputStream syserr; + /** + * Restore properties after test. + */ + @Rule + public SystemPropertiesRestoreRule restoreProperties = new SystemPropertiesRestoreRule(); + @Before public final void before() { if (suppressOutputStreams) { @@ -97,13 +89,11 @@ public abstract class WithNestedTests { } } - runsAsNested = true; + System.setProperty(TestRuleIgnoreTestSuites.PROPERTY_RUN_NESTED, "true"); } @After public final void after() { - runsAsNested = false; - if (suppressOutputStreams) { System.out.flush(); System.err.flush(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 943bd6820c8..7dbb7386d44 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -291,7 +291,8 @@ public abstract class LuceneTestCase extends Assert { */ @ClassRule public static TestRule classRules = RuleChain - .outerRule(suiteFailureMarker = new TestRuleMarkFailure()) + .outerRule(new TestRuleIgnoreTestSuites()) + .around(suiteFailureMarker = new TestRuleMarkFailure()) .around(new TestRuleAssertionsRequired()) .around(new TestRuleNoStaticHooksShadowing()) .around(new TestRuleNoInstanceHooksOverrides()) diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreTestSuites.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreTestSuites.java new file mode 100644 index 00000000000..58fa30cad1f --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreTestSuites.java @@ -0,0 +1,67 @@ +package org.apache.lucene.util; + +import org.junit.Assume; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runners.model.Statement; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This rule will cause the suite to be assumption-ignored if + * the test class implements a given marker interface and a special + * property is not set. + * + *

This is a workaround for problems with certain JUnit containers (IntelliJ) + * which automatically discover test suites and attempt to run nested classes + * that we use for testing the test framework itself. + */ +public final class TestRuleIgnoreTestSuites implements TestRule { + /** + * Marker interface for nested suites that should be ignored + * if executed in stand-alone mode. + */ + public static interface NestedTestSuite {} + + /** + * A boolean system property indicating nested suites should be executed + * normally. + */ + public final static String PROPERTY_RUN_NESTED = "tests.runnested"; + + @Override + public Statement apply(final Statement s, final Description d) { + return new Statement() { + @Override + public void evaluate() throws Throwable { + if (NestedTestSuite.class.isAssignableFrom(d.getTestClass())) { + LuceneTestCase.assumeTrue("Nested suite class ignored (started as stand-along).", + isRunningNested()); + } + s.evaluate(); + } + }; + } + + /** + * Check if a suite class is running as a nested test. + */ + public static boolean isRunningNested() { + return Boolean.getBoolean(PROPERTY_RUN_NESTED); + } +} From 0b1d814c94f05c0bfdbec074946dcf2465a3ff28 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 18 May 2012 12:28:21 +0000 Subject: [PATCH 30/47] SOLR-3454: Exception when using result grouping with main=true and using wt=javabin git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340080 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../java/org/apache/solr/search/Grouping.java | 2 +- .../org/apache/solr/TestGroupingSearch.java | 139 +++++++++++------- 3 files changed, 93 insertions(+), 51 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c796893b578..f9ce07b8939 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -425,6 +425,9 @@ Bug Fixes * SOLR-3436: Group count incorrect when not all shards are queried in the second pass. (Francois Perron, Martijn van Groningen) +* SOLR-3454: Exception when using result grouping with main=true and using + wt=javabin. (Ludovic Boutros, Martijn van Groningen) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java index a9e64e4bb94..54ec934b6b4 100755 --- a/solr/core/src/java/org/apache/solr/search/Grouping.java +++ b/solr/core/src/java/org/apache/solr/search/Grouping.java @@ -651,7 +651,7 @@ public class Grouping { } } - int len = docsGathered - offset; + int len = docsGathered > offset ? docsGathered - offset : 0; int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()])); float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()])); DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore); diff --git a/solr/core/src/test/org/apache/solr/TestGroupingSearch.java b/solr/core/src/test/org/apache/solr/TestGroupingSearch.java index 402aa0c0124..72a1de30f8d 100644 --- a/solr/core/src/test/org/apache/solr/TestGroupingSearch.java +++ b/solr/core/src/test/org/apache/solr/TestGroupingSearch.java @@ -20,13 +20,21 @@ package org.apache.solr; import org.apache.lucene.search.FieldCache; import org.apache.noggit.JSONUtil; import org.apache.noggit.ObjectBuilder; +import org.apache.solr.client.solrj.impl.BinaryResponseParser; +import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.GroupParams; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.BinaryResponseWriter; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.util.*; public class TestGroupingSearch extends SolrTestCaseJ4 { @@ -37,7 +45,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { @BeforeClass public static void beforeTests() throws Exception { - initCore("solrconfig.xml","schema12.xml"); + initCore("solrconfig.xml", "schema12.xml"); } @Before @@ -75,18 +83,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']" ); - assertQ(req("q","title:title", "group", "true", "group.field","group_si") - ,"//lst[@name='grouped']/lst[@name='group_si']" - ,"*[count(//arr[@name='groups']/lst) = 2]" + assertQ(req("q", "title:title", "group", "true", "group.field", "group_si") + , "//lst[@name='grouped']/lst[@name='group_si']" + , "*[count(//arr[@name='groups']/lst) = 2]" - ,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']" - ,"//arr[@name='groups']/lst[1]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" + , "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']" + , "//arr[@name='groups']/lst[1]/result[@numFound='2']" + , "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" - ,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']" - ,"//arr[@name='groups']/lst[2]/result[@numFound='3']" - ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']" - ); + , "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']" + , "//arr[@name='groups']/lst[2]/result[@numFound='3']" + , "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']" + ); } @Test @@ -117,19 +125,19 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']" ); - assertQ(req("q","title:title", "group", "true", "group.field","group_si", "group.ngroups", "true") - ,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']" - ,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']" - ,"*[count(//arr[@name='groups']/lst) = 2]" + assertQ(req("q", "title:title", "group", "true", "group.field", "group_si", "group.ngroups", "true") + , "//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']" + , "//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']" + , "*[count(//arr[@name='groups']/lst) = 2]" - ,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']" - ,"//arr[@name='groups']/lst[1]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" + , "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']" + , "//arr[@name='groups']/lst[1]/result[@numFound='2']" + , "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" - ,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']" - ,"//arr[@name='groups']/lst[2]/result[@numFound='3']" - ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']" - ); + , "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']" + , "//arr[@name='groups']/lst[2]/result[@numFound='3']" + , "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']" + ); } @Test @@ -141,22 +149,22 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { assertU(add(doc("id", "5","name", "author3", "title", "the title of a title"))); assertU(commit()); - assertQ(req("q","title:title", "group", "true", "group.field","name", "sort", "score desc", "group.sort", "score desc") - ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']" - // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[1]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" + assertQ(req("q", "title:title", "group", "true", "group.field", "name", "sort", "score desc", "group.sort", "score desc") + , "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']" + // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']" + , "//arr[@name='groups']/lst[1]/result[@numFound='2']" + , "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']" - ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']" - // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[2]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']" + , "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']" + // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']" + , "//arr[@name='groups']/lst[2]/result[@numFound='2']" + , "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']" - ,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']" - // ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']" - ,"//arr[@name='groups']/lst[3]/result[@numFound='1']" - ,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']" - ); + , "//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']" + // ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']" + , "//arr[@name='groups']/lst[3]/result[@numFound='1']" + , "//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']" + ); } @@ -168,18 +176,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { assertU(add(doc("id", "4","name", "author2", "weight", "0.11"))); assertU(commit()); - assertQ(req("q","*:*", "group", "true", "group.field","name", "sort", "id asc", "group.sort", "weight desc") - ,"*[count(//arr[@name='groups']/lst) = 2]" - ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']" - // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[1]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']" + assertQ(req("q", "*:*", "group", "true", "group.field", "name", "sort", "id asc", "group.sort", "weight desc") + , "*[count(//arr[@name='groups']/lst) = 2]" + , "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']" + // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']" + , "//arr[@name='groups']/lst[1]/result[@numFound='2']" + , "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']" - ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']" - // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[2]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']" - ); + , "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']" + // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']" + , "//arr[@name='groups']/lst[2]/result[@numFound='2']" + , "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']" + ); } @Test @@ -214,6 +222,37 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ); } + @Test + public void testGroupingSimpleFormatArrayIndexOutOfBoundsExceptionWithJavaBin() throws Exception { + assertU(add(doc("id", "1", "nullfirst", "1"))); + assertU(add(doc("id", "2", "nullfirst", "1"))); + assertU(add(doc("id", "3", "nullfirst", "2"))); + assertU(add(doc("id", "4", "nullfirst", "2"))); + assertU(add(doc("id", "5", "nullfirst", "2"))); + assertU(add(doc("id", "6", "nullfirst", "3"))); + assertU(commit()); + + SolrQueryRequest request = + req("q", "*:*","group", "true", "group.field", "nullfirst", "group.main", "true", "wt", "javabin", "start", "4", "rows", "10"); + + SolrQueryResponse response = new SolrQueryResponse(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try { + SolrRequestInfo.setRequestInfo(new SolrRequestInfo(request, response)); + String handlerName = request.getParams().get(CommonParams.QT); + h.getCore().execute(h.getCore().getRequestHandler(handlerName), request, response); + BinaryResponseWriter responseWriter = new BinaryResponseWriter(); + responseWriter.write(out, request, response); + } finally { + request.close(); + SolrRequestInfo.clearRequestInfo(); + } + + assertEquals(6, ((ResultContext) response.getValues().get("response")).docs.matches()); + new BinaryResponseParser().processResponse(new ByteArrayInputStream(out.toByteArray()), ""); + out.close(); + } + @Test public void testGroupingWithTimeAllowed() throws Exception { assertU(add(doc("id", "1"))); @@ -530,9 +569,9 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ); ///////////////////////// group.format == simple - assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.format","simple") - , "/grouped/foo_i=={'matches':10,'doclist':" - +"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}" + assertJQ(req("fq", filt, "q", "{!func}" + f2, "group", "true", "group.field", f, "fl", "id", "rows", "3", "start", "1", "group.limit", "2", "group.format", "simple") + , "/grouped/foo_i=={'matches':10,'doclist':" + + "{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}" ); } From 5efed3447ef7ffc65b7c33db6992e236311fb340 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Fri, 18 May 2012 16:58:38 +0000 Subject: [PATCH 31/47] LUCENE-3983: HTMLStripCharFilter: Stop upcasing HTML character entity names at class initialization time; instead, provide hard-coded upcased versions for a small set of them. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340169 13f79535-47bb-0310-9956-ffa450edef68 --- .../charfilter/HTMLCharacterEntities.jflex | 17 +++++++++--- .../charfilter/HTMLStripCharFilter.java | 26 +++++++++++++------ .../charfilter/HTMLStripCharFilter.jflex | 3 ++- .../lucene/analysis/charfilter/htmlentity.py | 17 +++++++++--- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex index b4f73a61411..a32e1480828 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex @@ -62,8 +62,16 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha" | "weierp" | "xi" | "yacute" | "yen" | "yuml" | "zeta" | "zwj" | "zwnj" ) %{ - private static final Set upperCaseVariantsAccepted - = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp")); + private static final Map upperCaseVariantsAccepted + = new HashMap(); + static { + upperCaseVariantsAccepted.put("quot", "QUOT"); + upperCaseVariantsAccepted.put("copy", "COPY"); + upperCaseVariantsAccepted.put("gt", "GT"); + upperCaseVariantsAccepted.put("lt", "LT"); + upperCaseVariantsAccepted.put("reg", "REG"); + upperCaseVariantsAccepted.put("amp", "AMP"); + } private static final CharArrayMap entityValues = new CharArrayMap(Version.LUCENE_40, 253, false); static { @@ -145,8 +153,9 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha" for (int i = 0 ; i < entities.length ; i += 2) { Character value = entities[i + 1].charAt(0); entityValues.put(entities[i], value); - if (upperCaseVariantsAccepted.contains(entities[i])) { - entityValues.put(entities[i].toUpperCase(), value); + String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]); + if (upperCaseVariant != null) { + entityValues.put(upperCaseVariant, value); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index 949110284e9..2b83aa017e4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/24/12 4:50 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */ package org.apache.lucene.analysis.charfilter; @@ -21,7 +21,8 @@ package org.apache.lucene.analysis.charfilter; import java.io.IOException; import java.util.Arrays; -import java.util.HashSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.lucene.util.Version; @@ -39,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 3/24/12 4:50 PM from the specification file - * C:/cygwin/home/s/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex + * on 5/18/12 12:24 PM from the specification file + * C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex */ public final class HTMLStripCharFilter extends BaseCharFilter { @@ -30522,8 +30523,16 @@ public final class HTMLStripCharFilter extends BaseCharFilter { private boolean zzEOFDone; /* user code: */ - private static final Set upperCaseVariantsAccepted - = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp")); + private static final Map upperCaseVariantsAccepted + = new HashMap(); + static { + upperCaseVariantsAccepted.put("quot", "QUOT"); + upperCaseVariantsAccepted.put("copy", "COPY"); + upperCaseVariantsAccepted.put("gt", "GT"); + upperCaseVariantsAccepted.put("lt", "LT"); + upperCaseVariantsAccepted.put("reg", "REG"); + upperCaseVariantsAccepted.put("amp", "AMP"); + } private static final CharArrayMap entityValues = new CharArrayMap(Version.LUCENE_40, 253, false); static { @@ -30605,8 +30614,9 @@ public final class HTMLStripCharFilter extends BaseCharFilter { for (int i = 0 ; i < entities.length ; i += 2) { Character value = entities[i + 1].charAt(0); entityValues.put(entities[i], value); - if (upperCaseVariantsAccepted.contains(entities[i])) { - entityValues.put(entities[i].toUpperCase(), value); + String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]); + if (upperCaseVariant != null) { + entityValues.put(upperCaseVariant, value); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex index 54d62cc9372..3c9116b8440 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex @@ -19,7 +19,8 @@ package org.apache.lucene.analysis.charfilter; import java.io.IOException; import java.util.Arrays; -import java.util.HashSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.lucene.util.Version; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py index b3300687556..ff9ee6bf3a1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py @@ -50,8 +50,16 @@ def main(): print output_line, ')' print '%{' - print ' private static final Set upperCaseVariantsAccepted' - print ' = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp"));' + print ' private static final Map upperCaseVariantsAccepted' + print ' = new HashMap();' + print ' static {' + print ' upperCaseVariantsAccepted.put("quot", "QUOT");' + print ' upperCaseVariantsAccepted.put("copy", "COPY");' + print ' upperCaseVariantsAccepted.put("gt", "GT");' + print ' upperCaseVariantsAccepted.put("lt", "LT");' + print ' upperCaseVariantsAccepted.put("reg", "REG");' + print ' upperCaseVariantsAccepted.put("amp", "AMP");' + print ' }' print ' private static final CharArrayMap entityValues' print ' = new CharArrayMap(Version.LUCENE_40, %i, false);' % len(keys) print ' static {' @@ -68,8 +76,9 @@ def main(): print ' for (int i = 0 ; i < entities.length ; i += 2) {' print ' Character value = entities[i + 1].charAt(0);' print ' entityValues.put(entities[i], value);' - print ' if (upperCaseVariantsAccepted.contains(entities[i])) {' - print ' entityValues.put(entities[i].toUpperCase(), value);' + print ' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);' + print ' if (upperCaseVariant != null) {' + print ' entityValues.put(upperCaseVariant, value);' print ' }' print ' }' print " }" From a0493e557085fb21e41bf2d1a88fd476e7a4104e Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 19 May 2012 16:29:58 +0000 Subject: [PATCH 32/47] LUCENE-4070: assign output to member to prevent double opening on error git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340514 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/store/CompoundFileWriter.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java index 8134b8cba47..a5e3d1cfb04 100644 --- a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java +++ b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java @@ -118,16 +118,14 @@ final class CompoundFileWriter implements Closeable{ private synchronized IndexOutput getOutput() throws IOException { if (dataOut == null) { - IndexOutput dataOutput = null; boolean success = false; try { - dataOutput = directory.createOutput(dataFileName, IOContext.DEFAULT); - dataOutput.writeVInt(FORMAT_CURRENT); - dataOut = dataOutput; + dataOut = directory.createOutput(dataFileName, IOContext.DEFAULT); + dataOut.writeVInt(FORMAT_CURRENT); success = true; } finally { if (!success) { - IOUtils.closeWhileHandlingException(dataOutput); + IOUtils.closeWhileHandlingException(dataOut); } } } From 419a026c0629a7ea08e050d289f6ac1ad45759d1 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 19 May 2012 17:24:36 +0000 Subject: [PATCH 33/47] fix over-copying of CFX files during addIndexes(Directory[]) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340521 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/IndexWriter.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 06b5e771fad..28d16a8ef4f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2159,6 +2159,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { sis.read(dir); final Set dsFilesCopied = new HashSet(); final Map dsNames = new HashMap(); + final Set copiedFiles = new HashSet(); + for (SegmentInfo info : sis) { assert !infos.contains(info): "dup info dir=" + info.dir + " name=" + info.name; @@ -2171,7 +2173,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { IOContext context = new IOContext(new MergeInfo(info.docCount, info.sizeInBytes(), true, -1)); - copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context); + copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles); infos.add(info); } @@ -2282,7 +2284,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { /** Copies the segment files as-is into the IndexWriter's directory. */ private void copySegmentAsIs(SegmentInfo info, String segName, - Map dsNames, Set dsFilesCopied, IOContext context) + Map dsNames, Set dsFilesCopied, IOContext context, + Set copiedFiles) throws IOException { // Determine if the doc store of this segment needs to be copied. It's // only relevant for segments that share doc store with others, @@ -2301,10 +2304,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { Set codecDocStoreFiles = new HashSet(); if (info.getDocStoreOffset() != -1) { // only violate the codec this way if its preflex - codec.storedFieldsFormat().files(info, codecDocStoreFiles); - codec.termVectorsFormat().files(info, codecDocStoreFiles); + info.getCodec().storedFieldsFormat().files(info, codecDocStoreFiles); + info.getCodec().termVectorsFormat().files(info, codecDocStoreFiles); } - + // Copy the segment files for (String file: info.files()) { final String newFileName; @@ -2319,6 +2322,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists"; + assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once"; + copiedFiles.add(file); info.dir.copy(directory, file, newFileName, context); } From 80ed49893cd992efdb4e76e45f8ac1555be57fe7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 19 May 2012 17:47:38 +0000 Subject: [PATCH 34/47] test untested query, fix broken equals() impl git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340526 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/queries/function/BoostedQuery.java | 7 +- .../queries/function/TestBoostedQuery.java | 99 +++++++++++++++++++ 2 files changed, 103 insertions(+), 3 deletions(-) create mode 100644 lucene/queries/src/test/org/apache/lucene/queries/function/TestBoostedQuery.java diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java index c0c2942ba33..91d98321d3e 100755 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java @@ -31,6 +31,8 @@ import java.util.Map; /** * Query that is boosted by a ValueSource */ +// TODO: BoostedQuery and BoostingQuery in the same module? +// something has to give public class BoostedQuery extends Query { private Query q; private ValueSource boostVal; // optional, can be null @@ -187,10 +189,9 @@ public class BoostedQuery extends Query { @Override public boolean equals(Object o) { - if (getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; BoostedQuery other = (BoostedQuery)o; - return this.getBoost() == other.getBoost() - && this.q.equals(other.q) + return this.q.equals(other.q) && this.boostVal.equals(other.boostVal); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestBoostedQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestBoostedQuery.java new file mode 100644 index 00000000000..5a9a0c2dd83 --- /dev/null +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestBoostedQuery.java @@ -0,0 +1,99 @@ +package org.apache.lucene.queries.function; + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queries.function.valuesource.ConstValueSource; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Basic tests for {@link BoostedQuery} + */ +// TODO: more tests +public class TestBoostedQuery extends LuceneTestCase { + static Directory dir; + static IndexReader ir; + static IndexSearcher is; + + @BeforeClass + public static void beforeClass() throws Exception { + dir = newDirectory(); + IndexWriterConfig iwConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwConfig.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig); + Document document = new Document(); + Field idField = new StringField("id", ""); + document.add(idField); + iw.addDocument(document); + ir = iw.getReader(); + is = newSearcher(ir); + iw.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + is = null; + ir.close(); + ir = null; + dir.close(); + dir = null; + } + + public void testBasic() throws Exception { + Query q = new MatchAllDocsQuery(); + TopDocs docs = is.search(q, 10); + assertEquals(1, docs.totalHits); + float score = docs.scoreDocs[0].score; + + Query boostedQ = new BoostedQuery(q, new ConstValueSource(2.0f)); + assertHits(boostedQ, new float[] { score*2 }); + } + + void assertHits(Query q, float scores[]) throws Exception { + ScoreDoc expected[] = new ScoreDoc[scores.length]; + int expectedDocs[] = new int[scores.length]; + for (int i = 0; i < expected.length; i++) { + expectedDocs[i] = i; + expected[i] = new ScoreDoc(i, scores[i]); + } + TopDocs docs = is.search(q, 10, + new Sort(new SortField("id", SortField.Type.STRING))); + CheckHits.checkHits(random(), q, "", is, expectedDocs); + CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs); + CheckHits.checkExplanations(q, "", is); + } +} From fa75b33949d18473ee5122cfd59b6a20d7a3c2a9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 20 May 2012 10:03:35 +0000 Subject: [PATCH 35/47] LUCENE-4046: Add IOException to DocsEnum#freq() git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340653 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/MappingMultiDocsAndPositionsEnum.java | 2 +- .../org/apache/lucene/codecs/MappingMultiDocsEnum.java | 2 +- .../org/apache/lucene/codecs/lucene3x/Lucene3xFields.java | 4 ++-- .../lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java | 4 ++-- .../lucene/codecs/lucene40/Lucene40PostingsReader.java | 6 +++--- .../lucene/codecs/lucene40/Lucene40TermVectorsReader.java | 4 ++-- .../apache/lucene/codecs/memory/MemoryPostingsFormat.java | 4 ++-- .../lucene/codecs/pulsing/PulsingPostingsReader.java | 4 ++-- .../org/apache/lucene/codecs/sep/SepPostingsReader.java | 4 ++-- .../lucene/codecs/simpletext/SimpleTextFieldsReader.java | 4 ++-- .../codecs/simpletext/SimpleTextTermVectorsReader.java | 4 ++-- lucene/core/src/java/org/apache/lucene/index/DocsEnum.java | 7 +++++-- .../java/org/apache/lucene/index/FilterAtomicReader.java | 4 ++-- .../org/apache/lucene/index/MultiDocsAndPositionsEnum.java | 2 +- .../src/java/org/apache/lucene/index/MultiDocsEnum.java | 2 +- .../java/org/apache/lucene/search/MultiPhraseQuery.java | 2 +- .../core/src/java/org/apache/lucene/search/TermScorer.java | 4 ++-- .../java/org/apache/lucene/index/memory/MemoryIndex.java | 4 ++-- .../lucene/codecs/ramonly/RAMOnlyPostingsFormat.java | 4 ++-- 19 files changed, 37 insertions(+), 34 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java index e4e89bf84ce..683b2c30e54 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java @@ -53,7 +53,7 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum } @Override - public int freq() { + public int freq() throws IOException { return current.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java index 0319e1da1ce..8a9bb79525b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java @@ -52,7 +52,7 @@ public final class MappingMultiDocsEnum extends DocsEnum { } @Override - public int freq() { + public int freq() throws IOException { return current.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java index 621fe44f481..3d153d60a11 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java @@ -1024,7 +1024,7 @@ class Lucene3xFields extends FieldsProducer { } @Override - public int freq() { + public int freq() throws IOException { return docs.freq(); } @@ -1071,7 +1071,7 @@ class Lucene3xFields extends FieldsProducer { } @Override - public int freq() { + public int freq() throws IOException { return pos.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index c66e0584dee..1df5ca960bc 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -528,7 +528,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader { private Bits liveDocs; @Override - public int freq() { + public int freq() throws IOException { return freq; } @@ -574,7 +574,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader { private int[] endOffsets; @Override - public int freq() { + public int freq() throws IOException { if (positions != null) { return positions.length; } else { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 0eb36aeff96..7bf74d7a9a2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -351,7 +351,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } @Override - public final int freq() { + public final int freq() throws IOException { assert !indexOmitsTF; return freq; } @@ -770,7 +770,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { return freq; } @@ -989,7 +989,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { return freq; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index c0420d1ba95..e44713b65ab 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -549,7 +549,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader { private Bits liveDocs; @Override - public int freq() { + public int freq() throws IOException { return freq; } @@ -595,7 +595,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader { private int[] endOffsets; @Override - public int freq() { + public int freq() throws IOException { if (positions != null) { return positions.length; } else { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index 8badc9ba7e3..1453252514e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -424,7 +424,7 @@ public class MemoryPostingsFormat extends PostingsFormat { } @Override - public int freq() { + public int freq() throws IOException { assert indexOptions != IndexOptions.DOCS_ONLY; return freq; } @@ -624,7 +624,7 @@ public class MemoryPostingsFormat extends PostingsFormat { } @Override - public int freq() { + public int freq() throws IOException { return freq; } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java index 18bd58867f0..644e48511b2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java @@ -356,7 +356,7 @@ public class PulsingPostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { assert indexOptions != IndexOptions.DOCS_ONLY; return freq; } @@ -462,7 +462,7 @@ public class PulsingPostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { return freq; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java index 533fd2f3b22..6424fe3b9d5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java @@ -423,7 +423,7 @@ public class SepPostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { assert !omitTF; return freq; } @@ -601,7 +601,7 @@ public class SepPostingsReader extends PostingsReaderBase { } @Override - public int freq() { + public int freq() throws IOException { return freq; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index c3b252df204..ec04e9a64c4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -269,7 +269,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } @Override - public int freq() { + public int freq() throws IOException { assert !omitTF; return tf; } @@ -370,7 +370,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } @Override - public int freq() { + public int freq() throws IOException { return tf; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 03d6825f1a7..c7c8dc7563d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -400,7 +400,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { private Bits liveDocs; @Override - public int freq() { + public int freq() throws IOException { assert freq != -1; return freq; } @@ -447,7 +447,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { private int[] endOffsets; @Override - public int freq() { + public int freq() throws IOException { if (positions != null) { return positions.length; } else { diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java index e8d97ad5fc5..7b48dd0645c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocsEnum.java @@ -17,6 +17,8 @@ package org.apache.lucene.index; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.AttributeSource; @@ -29,8 +31,9 @@ public abstract class DocsEnum extends DocIdSetIterator { /** Returns term frequency in the current document. Do * not call this before {@link #nextDoc} is first called, - * nor after {@link #nextDoc} returns NO_MORE_DOCS. */ - public abstract int freq(); + * nor after {@link #nextDoc} returns NO_MORE_DOCS. + **/ + public abstract int freq() throws IOException; /** Returns the related attributes. */ public AttributeSource attributes() { diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index cc67c40e5f1..e0c4b58b87f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -225,7 +225,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public int freq() { + public int freq() throws IOException { return in.freq(); } @@ -259,7 +259,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public int freq() { + public int freq() throws IOException { return in.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java index dd6f2634615..e515a2d3e85 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java @@ -69,7 +69,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum { } @Override - public int freq() { + public int freq() throws IOException { return current.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java index c90ab1d7a40..2597c7ced81 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java @@ -69,7 +69,7 @@ public final class MultiDocsEnum extends DocsEnum { } @Override - public int freq() { + public int freq() throws IOException { return current.freq(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 2dbc77e27cb..e008c197918 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -568,7 +568,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { } @Override - public final int freq() { + public final int freq() throws IOException { return _freq; } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index e5f93d13daa..ba638a5bbc5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -51,7 +51,7 @@ final class TermScorer extends Scorer { } @Override - public float freq() { + public float freq() throws IOException { return docsEnum.freq(); } @@ -66,7 +66,7 @@ final class TermScorer extends Scorer { } @Override - public float score() { + public float score() throws IOException { assert docID() != NO_MORE_DOCS; return docScorer.score(docsEnum.docID(), docsEnum.freq()); } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index d6615bcb31d..977a87d287f 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -945,7 +945,7 @@ public class MemoryIndex { } @Override - public int freq() { + public int freq() throws IOException { return positions.size(); } } @@ -987,7 +987,7 @@ public class MemoryIndex { } @Override - public int freq() { + public int freq() throws IOException { return positions.size() / stride; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index f23d0fc35e1..896f72a2bd5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -439,7 +439,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat { } @Override - public int freq() { + public int freq() throws IOException { return current.positions.length; } @@ -487,7 +487,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat { } @Override - public int freq() { + public int freq() throws IOException { return current.positions.length; } From d722b19748a2c1dabc3b7ff5fb16b4c6e3d65d75 Mon Sep 17 00:00:00 2001 From: Stefan Matheis Date: Sun, 20 May 2012 10:32:38 +0000 Subject: [PATCH 36/47] SOLR-3238: Add License for d3 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340665 13f79535-47bb-0310-9956-ffa450edef68 --- solr/webapp/web/js/lib/d3.js | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/solr/webapp/web/js/lib/d3.js b/solr/webapp/web/js/lib/d3.js index c1b6caf261e..c29ff9c9680 100755 --- a/solr/webapp/web/js/lib/d3.js +++ b/solr/webapp/web/js/lib/d3.js @@ -1,3 +1,34 @@ +/* + +Copyright (c) 2012, Michael Bostock +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* The name Michael Bostock may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + (function(){if (!Date.now) Date.now = function() { return +new Date; }; From d65f086bafa9fc1d32dcd7ba412ec43f41d8608e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Osi=C5=84ski?= Date: Sun, 20 May 2012 11:53:51 +0000 Subject: [PATCH 37/47] SOLR-3470: Custom Carrot2 tokenizer and stemmer factories overwritten by defaults: fixed git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340686 13f79535-47bb-0310-9956-ffa450edef68 --- .../carrot2/CarrotClusteringEngine.java | 13 +++- .../clustering/solr/conf/solrconfig.xml | 10 +++ .../carrot2/CarrotClusteringEngineTest.java | 28 +++++++ .../carrot2/DuplicatingStemmerFactory.java | 34 +++++++++ .../carrot2/DuplicatingTokenizerFactory.java | 52 +++++++++++++ .../carrot2/EchoStemsClusteringAlgorithm.java | 75 +++++++++++++++++++ .../EchoTokensClusteringAlgorithm.java | 69 +++++++++++++++++ 7 files changed, 277 insertions(+), 4 deletions(-) create mode 100644 solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingStemmerFactory.java create mode 100644 solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingTokenizerFactory.java create mode 100644 solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoStemsClusteringAlgorithm.java create mode 100644 solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoTokensClusteringAlgorithm.java diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index df62be8f59c..edaffe18db5 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -62,6 +62,7 @@ import org.carrot2.core.LanguageCode; import org.carrot2.core.attribute.AttributeNames; import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor; +import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder; import org.carrot2.util.resource.ClassLoaderLocator; import org.carrot2.util.resource.IResource; import org.carrot2.util.resource.IResourceLocator; @@ -255,10 +256,14 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { // Additionally, we set a custom lexical resource factory for Carrot2 that // will use both Carrot2 default stop words as well as stop words from // the StopFilter defined on the field. - BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes) - .stemmerFactory(LuceneCarrot2StemmerFactory.class) - .tokenizerFactory(LuceneCarrot2TokenizerFactory.class) - .lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class); + final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes); + attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class); + if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) { + attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class); + } + if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) { + attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class); + } // Pass the schema to SolrStopwordsCarrot2LexicalDataFactory. initAttributes.put("solrIndexSchema", core.getSchema()); diff --git a/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml b/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml index 41787f82644..430c0616564 100644 --- a/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml +++ b/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml @@ -339,6 +339,16 @@ org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm clustering/custom + + custom-duplicating-tokenizer + org.apache.solr.handler.clustering.carrot2.EchoTokensClusteringAlgorithm + org.apache.solr.handler.clustering.carrot2.DuplicatingTokenizerFactory + + + custom-duplicating-stemmer + org.apache.solr.handler.clustering.carrot2.EchoStemsClusteringAlgorithm + org.apache.solr.handler.clustering.carrot2.DuplicatingStemmerFactory + diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java index e4ef997f3d5..911f03d787d 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java @@ -352,6 +352,34 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { assertEquals("List field", "[first, second]", labels.get(4)); } + @Test + public void customTokenizer() throws Exception { + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(CarrotParams.TITLE_FIELD_NAME, "title"); + params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); + + final List labels = getLabels(checkEngine( + getClusteringEngine("custom-duplicating-tokenizer"), 1, 16, new TermQuery(new Term("title", + "field")), params).get(0)); + + // The custom test tokenizer duplicates each token's text + assertTrue("First token", labels.get(0).contains("TitleTitle")); + } + + @Test + public void customStemmer() throws Exception { + final ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(CarrotParams.TITLE_FIELD_NAME, "title"); + params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); + + final List labels = getLabels(checkEngine( + getClusteringEngine("custom-duplicating-stemmer"), 1, 12, new TermQuery(new Term("title", + "field")), params).get(0)); + + // The custom test stemmer duplicates and lowercases each token's text + assertTrue("First token", labels.get(0).contains("titletitle")); + } + private CarrotClusteringEngine getClusteringEngine(String engineName) { ClusteringComponent comp = (ClusteringComponent) h.getCore() .getSearchComponent("clustering"); diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingStemmerFactory.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingStemmerFactory.java new file mode 100644 index 00000000000..c090a1567e1 --- /dev/null +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingStemmerFactory.java @@ -0,0 +1,34 @@ +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.carrot2.core.LanguageCode; +import org.carrot2.text.linguistic.IStemmer; +import org.carrot2.text.linguistic.IStemmerFactory; + +public class DuplicatingStemmerFactory implements IStemmerFactory { + @Override + public IStemmer getStemmer(LanguageCode language) { + return new IStemmer() { + @Override + public CharSequence stem(CharSequence word) { + return word.toString() + word.toString(); + } + }; + } +} diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingTokenizerFactory.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingTokenizerFactory.java new file mode 100644 index 00000000000..99e6b605c91 --- /dev/null +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingTokenizerFactory.java @@ -0,0 +1,52 @@ +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.carrot2.core.LanguageCode; +import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer; +import org.carrot2.text.analysis.ITokenizer; +import org.carrot2.text.linguistic.ITokenizerFactory; +import org.carrot2.text.util.MutableCharArray; + +public class DuplicatingTokenizerFactory implements ITokenizerFactory { + @Override + public ITokenizer getTokenizer(LanguageCode language) { + return new ITokenizer() { + private final ExtendedWhitespaceTokenizer delegate = new ExtendedWhitespaceTokenizer(); + + @Override + public void setTermBuffer(MutableCharArray buffer) { + delegate.setTermBuffer(buffer); + buffer.reset(buffer.toString() + buffer.toString()); + } + + @Override + public void reset(Reader input) throws IOException { + delegate.reset(input); + } + + @Override + public short nextToken() throws IOException { + return delegate.nextToken(); + } + }; + } +} diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoStemsClusteringAlgorithm.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoStemsClusteringAlgorithm.java new file mode 100644 index 00000000000..52d8e054230 --- /dev/null +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoStemsClusteringAlgorithm.java @@ -0,0 +1,75 @@ +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.List; + +import org.carrot2.core.Cluster; +import org.carrot2.core.Document; +import org.carrot2.core.IClusteringAlgorithm; +import org.carrot2.core.LanguageCode; +import org.carrot2.core.ProcessingComponentBase; +import org.carrot2.core.ProcessingException; +import org.carrot2.core.attribute.AttributeNames; +import org.carrot2.core.attribute.Processing; +import org.carrot2.text.preprocessing.PreprocessingContext; +import org.carrot2.text.preprocessing.PreprocessingContext.AllStems; +import org.carrot2.text.preprocessing.PreprocessingContext.AllTokens; +import org.carrot2.text.preprocessing.PreprocessingContext.AllWords; +import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline; +import org.carrot2.util.attribute.Attribute; +import org.carrot2.util.attribute.Bindable; +import org.carrot2.util.attribute.Input; +import org.carrot2.util.attribute.Output; + +import com.google.common.collect.Lists; + +/** + * A mock Carrot2 clustering algorithm that outputs stem of each token of each + * document as a separate cluster. Useful only in tests. + */ +@Bindable(prefix = "EchoTokensClusteringAlgorithm") +public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase + implements IClusteringAlgorithm { + @Input + @Processing + @Attribute(key = AttributeNames.DOCUMENTS) + private List documents; + + @Output + @Processing + @Attribute(key = AttributeNames.CLUSTERS) + private List clusters; + + BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline(); + + @Override + public void process() throws ProcessingException { + final PreprocessingContext preprocessingContext = preprocessing.preprocess( + documents, "", LanguageCode.ENGLISH); + final AllTokens allTokens = preprocessingContext.allTokens; + final AllWords allWords = preprocessingContext.allWords; + final AllStems allStems = preprocessingContext.allStems; + clusters = Lists.newArrayListWithCapacity(allTokens.image.length); + for (int i = 0; i < allTokens.image.length; i++) { + if (allTokens.wordIndex[i] >= 0) { + clusters.add(new Cluster(new String( + allStems.image[allWords.stemIndex[allTokens.wordIndex[i]]]))); + } + } + } +} diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoTokensClusteringAlgorithm.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoTokensClusteringAlgorithm.java new file mode 100644 index 00000000000..2ed2d95dbf5 --- /dev/null +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoTokensClusteringAlgorithm.java @@ -0,0 +1,69 @@ +package org.apache.solr.handler.clustering.carrot2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.List; + +import org.carrot2.core.Cluster; +import org.carrot2.core.Document; +import org.carrot2.core.IClusteringAlgorithm; +import org.carrot2.core.LanguageCode; +import org.carrot2.core.ProcessingComponentBase; +import org.carrot2.core.ProcessingException; +import org.carrot2.core.attribute.AttributeNames; +import org.carrot2.core.attribute.Processing; +import org.carrot2.text.preprocessing.PreprocessingContext; +import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline; +import org.carrot2.util.attribute.Attribute; +import org.carrot2.util.attribute.Bindable; +import org.carrot2.util.attribute.Input; +import org.carrot2.util.attribute.Output; + +import com.google.common.collect.Lists; + +/** + * A mock Carrot2 clustering algorithm that outputs each token of each document + * as a separate cluster. Useful only in tests. + */ +@Bindable(prefix = "EchoTokensClusteringAlgorithm") +public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase + implements IClusteringAlgorithm { + @Input + @Processing + @Attribute(key = AttributeNames.DOCUMENTS) + private List documents; + + @Output + @Processing + @Attribute(key = AttributeNames.CLUSTERS) + private List clusters; + + BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline(); + + @Override + public void process() throws ProcessingException { + final PreprocessingContext preprocessingContext = preprocessing.preprocess( + documents, "", LanguageCode.ENGLISH); + clusters = Lists + .newArrayListWithCapacity(preprocessingContext.allTokens.image.length); + for (char[] token : preprocessingContext.allTokens.image) { + if (token != null) { + clusters.add(new Cluster(new String(token))); + } + } + } +} From 664ab89dee64ee40d17ecb462ec6d0a996913cbb Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sun, 20 May 2012 12:06:22 +0000 Subject: [PATCH 38/47] SOLR-3471: Disable tests that don't work correctly on Windows git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340687 13f79535-47bb-0310-9956-ffa450edef68 --- .../test/org/apache/solr/core/TestSolrDeletionPolicy1.java | 4 ++++ .../src/test/org/apache/solr/update/SoftAutoCommitTest.java | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/solr/core/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java b/solr/core/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java index 1242c3fb568..53cec4828c7 100644 --- a/solr/core/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java +++ b/solr/core/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java @@ -17,6 +17,7 @@ package org.apache.solr.core; import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.util.Constants; import org.apache.solr.SolrTestCaseJ4; import org.junit.Before; import org.junit.BeforeClass; @@ -109,6 +110,9 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 { @Test public void testCommitAge() throws InterruptedException { + assumeFalse("This test is not working on Windows (or maybe machines with only 2 CPUs)", + Constants.WINDOWS); + IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy(); addDocs(); Map commits = delPolicy.getCommits(); diff --git a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java index 8c2b3591a76..221f95a692b 100644 --- a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java +++ b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertEquals; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; +import org.apache.lucene.util.Constants; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrEventListener; @@ -62,6 +63,9 @@ public class SoftAutoCommitTest extends AbstractSolrTestCase { @Before public void createMonitor() throws Exception { + assumeFalse("This test is not working on Windows (or maybe machines with only 2 CPUs)", + Constants.WINDOWS); + SolrCore core = h.getCore(); updater = (DirectUpdateHandler2) core.getUpdateHandler(); From 8945e4b2ce2a66b01c1016628d91c527478e642c Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 20 May 2012 13:41:33 +0000 Subject: [PATCH 39/47] don't excessively call RateLimiter in tests git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340713 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/store/MockIndexOutputWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java index 9658110e006..1b9b1adee68 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java @@ -78,7 +78,7 @@ public class MockIndexOutputWrapper extends IndexOutput { long freeSpace = dir.maxSize == 0 ? 0 : dir.maxSize - dir.sizeInBytes(); long realUsage = 0; - if (dir.rateLimiter != null) { + if (dir.rateLimiter != null && len >= 10) { dir.rateLimiter.pause(len); } From 5c714e8147b2476c46bf3991de6913a4309c6846 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 20 May 2012 18:39:51 +0000 Subject: [PATCH 40/47] fix testcase to not check unreliable thread state in join helper git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340794 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/TestDocumentsWriterStallControl.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java b/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java index 0252c9f1aa6..46052131015 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java @@ -27,7 +27,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController; import org.apache.lucene.util.LuceneTestCase; -import com.carrotsearch.randomizedtesting.annotations.Repeat; import com.carrotsearch.randomizedtesting.annotations.ThreadLeaks; /** @@ -59,7 +58,7 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase { memCtrl.netBytes = 50; ctrl.updateStalled(memCtrl); assertFalse(ctrl.anyStalledThreads()); - join(waitThreads, 100); + join(waitThreads, 500); } public void testRandom() throws InterruptedException { @@ -297,8 +296,6 @@ public class TestDocumentsWriterStallControl extends LuceneTestCase { throws InterruptedException { for (Thread thread : toJoin) { thread.join(timeout); - assertEquals(thread.getState().toString(), Thread.State.TERMINATED, - thread.getState()); } } From abaec93967a7b10993de34870c4851c464c69c2a Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 20 May 2012 20:54:17 +0000 Subject: [PATCH 41/47] add a couple cloud props to multicore example solr.xml git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340837 13f79535-47bb-0310-9956-ffa450edef68 --- solr/example/multicore/solr.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/example/multicore/solr.xml b/solr/example/multicore/solr.xml index c6ed7e57983..ff97c67722b 100644 --- a/solr/example/multicore/solr.xml +++ b/solr/example/multicore/solr.xml @@ -28,7 +28,7 @@ adminPath: RequestHandler path to manage cores. If 'null' (or absent), cores will not be manageable via request handler --> - + From ff037852fc54d3bda26353c664799ac94df5af66 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 20 May 2012 22:49:35 +0000 Subject: [PATCH 42/47] add ping and analysis request handlers for good UI defaults git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340863 13f79535-47bb-0310-9956-ffa450edef68 --- solr/example/multicore/core0/conf/solrconfig.xml | 12 +++++++++++- solr/example/multicore/core1/conf/solrconfig.xml | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/solr/example/multicore/core0/conf/solrconfig.xml b/solr/example/multicore/core0/conf/solrconfig.xml index 18d842b718a..d29101c22cf 100644 --- a/solr/example/multicore/core0/conf/solrconfig.xml +++ b/solr/example/multicore/core0/conf/solrconfig.xml @@ -51,9 +51,19 @@ + - + + + + solrpingquery + + + all + + + solr diff --git a/solr/example/multicore/core1/conf/solrconfig.xml b/solr/example/multicore/core1/conf/solrconfig.xml index 7bc5a3ac6dd..13c59fbf400 100644 --- a/solr/example/multicore/core1/conf/solrconfig.xml +++ b/solr/example/multicore/core1/conf/solrconfig.xml @@ -51,9 +51,19 @@ + - + + + + solrpingquery + + + all + + + solr From a29a6c6f29e23ce36eadc39cc4535966d23fc4e3 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Sun, 20 May 2012 23:10:02 +0000 Subject: [PATCH 43/47] SOLR-3472: ping request handler should force distrib=false default git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340869 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/handler/PingRequestHandler.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java index 7a991c76407..4206f634e86 100644 --- a/solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java @@ -25,6 +25,7 @@ import java.util.Locale; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; @@ -173,7 +174,15 @@ public class PingRequestHandler extends RequestHandlerBase implements SolrCoreAw { SolrParams params = req.getParams(); - SolrCore core = req.getCore(); + + // in this case, we want to default distrib to false so + // we only ping the single node + Boolean distrib = params.getBool("distrib"); + if (distrib == null) { + ModifiableSolrParams mparams = new ModifiableSolrParams(params); + mparams.set("distrib", false); + req.setParams(mparams); + } String actionParam = params.get("action"); ACTIONS action = null; From aee60cb3ab36e05bb560413f7c5c66d849791f11 Mon Sep 17 00:00:00 2001 From: Stefan Matheis Date: Mon, 21 May 2012 08:37:34 +0000 Subject: [PATCH 44/47] SOLR-3459: Fix Cloud's Graph-Views for multiple collections git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340933 13f79535-47bb-0310-9956-ffa450edef68 --- solr/webapp/web/css/styles/cloud.css | 18 ++++++++++++++---- solr/webapp/web/js/scripts/cloud.js | 20 +++++++++++--------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/solr/webapp/web/css/styles/cloud.css b/solr/webapp/web/css/styles/cloud.css index 74163240d60..ef0c963045a 100644 --- a/solr/webapp/web/css/styles/cloud.css +++ b/solr/webapp/web/css/styles/cloud.css @@ -236,17 +236,17 @@ stroke-width: 1.5px; } -#content #graph-content .node.lvl-2 text +#content #graph-content .node.lvl-3 text { cursor: pointer; } -#content #graph-content .node.lvl-2:hover circle +#content #graph-content .node.lvl-3:hover circle { stroke: #000 !important; } -#content #graph-content .node.lvl-2:hover text +#content #graph-content .node.lvl-3:hover text { fill: #000 !important; } @@ -314,8 +314,18 @@ fill: #000; } -#content #graph-content .link.lvl-1, +#content #graph-content .link.lvl-2, #content #graph-content .link.leader { stroke: #c0c0c0; +} + +#content #graph-content .node.lvl-0 circle +{ + stroke: #fff; +} + +#content #graph-content .link.lvl-1 +{ + stroke: #fff; } \ No newline at end of file diff --git a/solr/webapp/web/js/scripts/cloud.js b/solr/webapp/web/js/scripts/cloud.js index 21b8783afb7..5aa9f2e1fc7 100644 --- a/solr/webapp/web/js/scripts/cloud.js +++ b/solr/webapp/web/js/scripts/cloud.js @@ -156,12 +156,12 @@ var helper_path_class = function( p ) var classes = [ 'link' ]; classes.push( 'lvl-' + p.target.depth ); - if( p.target.data.leader ) + if( p.target.data && p.target.data.leader ) { classes.push( 'leader' ); } - if( p.target.data.state ) + if( p.target.data && p.target.data.state ) { classes.push( p.target.data.state ); } @@ -174,12 +174,12 @@ var helper_node_class = function( d ) var classes = [ 'node' ]; classes.push( 'lvl-' + d.depth ); - if( d.data.leader ) + if( d.data && d.data.leader ) { classes.push( 'leader' ); } - if( d.data.state ) + if( d.data && d.data.state ) { classes.push( d.data.state ); } @@ -197,7 +197,7 @@ var helper_data = { var helper_node_text = function( d ) { - if( !d.data.uri ) + if( !d.data || !d.data.uri ) { return d.name; } @@ -361,7 +361,11 @@ var prepare_graph = function( graph_element, callback ) eval( 'state = ' + response.znode.data + ';' ); var leaf_count = 0; - var collections = []; + var graph_data = { + name: null, + children : [] + }; + for( var c in state ) { var shards = []; @@ -424,10 +428,8 @@ var prepare_graph = function( graph_element, callback ) }, children: shards }; - collections.push( collection ); + graph_data.children.push( collection ); } - - var graph_data = collections.shift(); helper_data.protocol = $.unique( helper_data.protocol ); helper_data.host = $.unique( helper_data.host ); From a79a14347d4ba81bc106598db904ec2b97321355 Mon Sep 17 00:00:00 2001 From: Stefan Matheis Date: Mon, 21 May 2012 08:47:39 +0000 Subject: [PATCH 45/47] SOLR-3234: Remove contrib/dataimport's webapp git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340934 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/webapp/admin/dataimport.jsp | 53 --------- .../src/webapp/admin/debug.jsp | 103 ------------------ 2 files changed, 156 deletions(-) delete mode 100644 solr/contrib/dataimporthandler/src/webapp/admin/dataimport.jsp delete mode 100644 solr/contrib/dataimporthandler/src/webapp/admin/debug.jsp diff --git a/solr/contrib/dataimporthandler/src/webapp/admin/dataimport.jsp b/solr/contrib/dataimporthandler/src/webapp/admin/dataimport.jsp deleted file mode 100644 index 667f8947d54..00000000000 --- a/solr/contrib/dataimporthandler/src/webapp/admin/dataimport.jsp +++ /dev/null @@ -1,53 +0,0 @@ -<%@ page import="org.apache.solr.request.SolrRequestHandler" %> -<%@ page import="java.util.Map" %> -<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%> -<%-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---%> -<%-- do a verbatim include so we can use the local vars --%> -<%@include file="_info.jsp"%> - -<% - String handler = request.getParameter("handler"); - - if (handler == null) { - Map handlers = core.getRequestHandlers(); -%> - - DataImportHandler Interactive Development - - - -Select handler: -

- -<% } else { %> - - - - - -<% } %> - diff --git a/solr/contrib/dataimporthandler/src/webapp/admin/debug.jsp b/solr/contrib/dataimporthandler/src/webapp/admin/debug.jsp deleted file mode 100644 index 2c76ce728c8..00000000000 --- a/solr/contrib/dataimporthandler/src/webapp/admin/debug.jsp +++ /dev/null @@ -1,103 +0,0 @@ -<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%> -<%-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---%> -<%-- do a verbatim include so we can use the local vars --%> -<%@include file="_info.jsp"%> - - -DataImportHandler Interactive Development - - - - - - -

DataImportHandler Development Console

-<% - String handler = request.getParameter("handler"); // must be specified -%> -
- - - - - - - - - - - - - - - - -
Handler: <%=handler%> change handler
- - - - - - - - - -
- - Verbose Commit Clean Start Row No. of Rows 
-
data config xml -
-
-
- - - - - - - -
-
- - - - -
-
- - - - - -
- -Return to Admin Page - - From 98d15d4e6787779b2c97a0b908358b2e69c4d981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Osi=C5=84ski?= Date: Mon, 21 May 2012 08:57:31 +0000 Subject: [PATCH 46/47] SOLR-3470: Custom Carrot2 tokenizer and stemmer factories overwritten by defaults: fixing class loader issues, minor cleanups (Dawid Weiss) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340939 13f79535-47bb-0310-9956-ffa450edef68 --- solr/contrib/clustering/CHANGES.txt | 3 +- .../carrot2/CarrotClusteringEngine.java | 37 ++++++++++-- .../clustering/carrot2/CarrotParams.java | 57 ++++++++++++------- .../carrot2/CarrotClusteringEngineTest.java | 22 +++---- 4 files changed, 83 insertions(+), 36 deletions(-) diff --git a/solr/contrib/clustering/CHANGES.txt b/solr/contrib/clustering/CHANGES.txt index 1baee1dc05d..ebda2079ba3 100644 --- a/solr/contrib/clustering/CHANGES.txt +++ b/solr/contrib/clustering/CHANGES.txt @@ -9,7 +9,8 @@ CHANGES $Id$ ================== Release 4.0.0-dev ============== -(No Changes) +* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are + respected now (Stanislaw Osinski, Dawid Weiss) ================== Release 3.6.0 ================== diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java index edaffe18db5..b086318e99f 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java @@ -109,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { private Controller controller = ControllerFactory.createPooling(); private Class clusteringAlgorithmClass; + /** Solr core we're bound to. */ + private SolrCore core; + private static class SolrResourceLocator implements IResourceLocator { private final SolrResourceLoader resourceLoader; private final String carrot2ResourcesDir; @@ -147,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { public InputStream open() throws IOException { return new ByteArrayInputStream(asBytes); } - + @Override public int hashCode() { // In case multiple resources are found they will be deduped, but we don't use it in Solr, @@ -232,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { extractCarrotAttributes(sreq.getParams(), attributes); // Perform clustering and convert to named list - return clustersToNamedList(controller.process(attributes, - clusteringAlgorithmClass).getClusters(), sreq.getParams()); + // Carrot2 uses current thread's context class loader to get + // certain classes (e.g. custom tokenizer/stemmer) at runtime. + // To make sure classes from contrib JARs are available, + // we swap the context class loader for the time of clustering. + Thread ct = Thread.currentThread(); + ClassLoader prev = ct.getContextClassLoader(); + try { + ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); + return clustersToNamedList(controller.process(attributes, + clusteringAlgorithmClass).getClusters(), sreq.getParams()); + } finally { + ct.setContextClassLoader(prev); + } } catch (Exception e) { log.error("Carrot2 clustering failed", e); throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e); @@ -243,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { @Override @SuppressWarnings({ "unchecked", "rawtypes" }) public String init(NamedList config, final SolrCore core) { + this.core = core; + String result = super.init(config, core); final SolrParams initParams = SolrParams.toSolrParams(config); @@ -277,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine { // Using the class loader directly because this time we want to omit the prefix new ClassLoaderLocator(core.getResourceLoader().getClassLoader()))); - this.controller.init(initAttributes); - + // Carrot2 uses current thread's context class loader to get + // certain classes (e.g. custom tokenizer/stemmer) at initialization time. + // To make sure classes from contrib JARs are available, + // we swap the context class loader for the time of clustering. + Thread ct = Thread.currentThread(); + ClassLoader prev = ct.getContextClassLoader(); + try { + ct.setContextClassLoader(core.getResourceLoader().getClassLoader()); + this.controller.init(initAttributes); + } finally { + ct.setContextClassLoader(prev); + } + SchemaField uniqueField = core.getSchema().getUniqueKeyField(); if (uniqueField == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java index 00050476a56..f9d7b757cb1 100644 --- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java +++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java @@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet; */ -public interface CarrotParams { +/** + * Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration). + */ +public final class CarrotParams { - String CARROT_PREFIX = "carrot."; + private static String CARROT_PREFIX = "carrot."; - String ALGORITHM = CARROT_PREFIX + "algorithm"; + public static String ALGORITHM = CARROT_PREFIX + "algorithm"; - String TITLE_FIELD_NAME = CARROT_PREFIX + "title"; - String URL_FIELD_NAME = CARROT_PREFIX + "url"; - String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet"; - String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang"; - String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom"; + public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title"; + public static String URL_FIELD_NAME = CARROT_PREFIX + "url"; + public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet"; + public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang"; + public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom"; - String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary"; - String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize"; - String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets"; + public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary"; + public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize"; + public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets"; - String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions"; - String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters"; - String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir"; - String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap"; + public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions"; + public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters"; + public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir"; + public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap"; - public static final Set CARROT_PARAM_NAMES = ImmutableSet.of( - ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME, - PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS, - LEXICAL_RESOURCES_DIR); + static final Set CARROT_PARAM_NAMES = ImmutableSet.of( + ALGORITHM, + + TITLE_FIELD_NAME, + URL_FIELD_NAME, + SNIPPET_FIELD_NAME, + LANGUAGE_FIELD_NAME, + CUSTOM_FIELD_NAME, + + PRODUCE_SUMMARY, + SUMMARY_FRAGSIZE, + SUMMARY_SNIPPETS, + + NUM_DESCRIPTIONS, + OUTPUT_SUB_CLUSTERS, + LEXICAL_RESOURCES_DIR, + LANGUAGE_CODE_MAP); + + /** No instances. */ + private CarrotParams() {} } diff --git a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java index 911f03d787d..94502350bcd 100644 --- a/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java +++ b/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java @@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void solrStopWordsUsedInCarrot2Clustering() throws Exception { + public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("merge-resources", false); params.set(AttributeUtils.getKey( @@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception { + public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception { ModifiableSolrParams params = new ModifiableSolrParams(); // Force string fields to be used for clustering. Does not make sense // in a real word, but does the job in the test. @@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void highlightingOfMultiValueField() throws Exception { + public void testHighlightingOfMultiValueField() throws Exception { final String snippetWithoutSummary = getLabels(clusterWithHighlighting( false, 30, 3, "multi", 1).get(0)).get(1); assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First")); @@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void concatenatingMultipleFields() throws Exception { + public void testConcatenatingMultipleFields() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body"); @@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void highlightingMultipleFields() throws Exception { + public void testHighlightingMultipleFields() throws Exception { final TermQuery query = new TermQuery(new Term("snippet", "content")); final ModifiableSolrParams params = new ModifiableSolrParams(); @@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void oneCarrot2SupportedLanguage() throws Exception { + public void testOneCarrot2SupportedLanguage() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); @@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void oneCarrot2SupportedLanguageOfMany() throws Exception { + public void testOneCarrot2SupportedLanguageOfMany() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); @@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void languageCodeMapping() throws Exception { + public void testLanguageCodeMapping() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang"); params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl"); @@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void passingOfCustomFields() throws Exception { + public void testPassingOfCustomFields() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield"); params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield"); @@ -353,7 +353,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void customTokenizer() throws Exception { + public void testCustomTokenizer() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); @@ -367,7 +367,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase { } @Test - public void customStemmer() throws Exception { + public void testCustomStemmer() throws Exception { final ModifiableSolrParams params = new ModifiableSolrParams(); params.add(CarrotParams.TITLE_FIELD_NAME, "title"); params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet"); From 20f10be75b469a4ca5817815b99098eb44c60cb6 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 21 May 2012 11:07:01 +0000 Subject: [PATCH 47/47] LUCENE-4068: Improve IW#addDocuments(...) javadoc git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1340966 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/IndexWriter.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 28d16a8ef4f..d229c71d39a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1041,12 +1041,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * *

WARNING: the index does not currently record * which documents were added as a block. Today this is - * fine, because merging will preserve the block (as long - * as none them were deleted). But it's possible in the - * future that Lucene may more aggressively re-order - * documents (for example, perhaps to obtain better index - * compression), in which case you may need to fully - * re-index your documents at that time. + * fine, because merging will preserve a block. The order of + * documents within a segment will be preserved, even when child + * documents within a block are deleted. Most search features + * (like result grouping and block joining) require you to + * mark documents; when these documents are deleted these + * search features will not work as expected. Obviously adding + * documents to an existing block will require you the reindex + * the entire block. + * + *

However it's possible that in the future Lucene may + * merge more aggressively re-order documents (for example, + * perhaps to obtain better index compression), in which case + * you may need to fully re-index your documents at that time. * *

See {@link #addDocument(Iterable)} for details on * index and IndexWriter state after an Exception, and