diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 17ea33945f8..dce825156f8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -266,6 +266,11 @@ Changes in backwards compatibility policy * LUCENE-3970: Rename Fields.getUniqueFieldCount -> .size() and Terms.getUniqueTermCount -> .size(). (Iulius Curt via Mike McCandless) +* LUCENE-3514: IndexSearcher.setDefaultFieldSortScoring was removed + and replaced with per-search control via new expert search methods + that take two booleans indicating whether hit scores and max + score should be computed. (Mike McCandless) + Changes in Runtime Behavior * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you @@ -508,6 +513,9 @@ API Changes immutable instances of NormalizeCharMap. (Dawid Weiss, Mike McCandless) +* LUCENE-4063: FrenchLightStemmer no longer deletes repeated digits. + (Tanguy Moal via Steve Rowe) + New features * LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions @@ -857,6 +865,10 @@ New features * LUCENE-4039: Add AddIndexesTask to benchmark, which uses IW.addIndexes. (Shai Erera) +* LUCENE-3514: Added IndexSearcher.searchAfter when Sort is used, + returning results after a specified FieldDoc for deep + paging. (Mike McCandless) + Optimizations * LUCENE-2588: Don't store unnecessary suffixes when writing the terms @@ -905,7 +917,11 @@ Optimizations FST under the hood, which requires less RAM. NormalizeCharMap no longer accepts empty string match (it did previously, but ignored it). (Dawid Weiss, Mike McCandless) - + +* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory + and few general improvements to DirectoryTaxonomyWriter. + (Shai Erera, Gilad Barkai) + Bug fixes * LUCENE-2803: The FieldCache can miss values if an entry for a reader @@ -953,6 +969,11 @@ Bug fixes offset calculation in PathHierarchyTokenizer. (Mike McCandless, Uwe Schindler, Robert Muir) +* LUCENE-4060: Fix a synchronization bug in + DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to + addTaxonomy and now takes only one Directory and one OrdinalMap. + (Shai Erera, Gilad Barkai) + Documentation * LUCENE-3958: Javadocs corrections for IndexWriter. @@ -990,6 +1011,10 @@ Build * LUCENE-3286: Moved remainder of contrib/xml-query-parser to lucene/queryparser. Classes now found at org.apache.lucene.queryparser.xml.* +* LUCENE-4059: Improve ANT task prepare-webpages (used by documentation + tasks) to correctly encode build file names as URIs for later processing by + XSL. (Greg Bowyer, Uwe Schindler) + ======================= Lucene 3.6.0 ======================= Changes in backwards compatibility policy diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex index b4f73a61411..a32e1480828 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex @@ -62,8 +62,16 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha" | "weierp" | "xi" | "yacute" | "yen" | "yuml" | "zeta" | "zwj" | "zwnj" ) %{ - private static final Set upperCaseVariantsAccepted - = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp")); + private static final Map upperCaseVariantsAccepted + = new HashMap(); + static { + upperCaseVariantsAccepted.put("quot", "QUOT"); + upperCaseVariantsAccepted.put("copy", "COPY"); + upperCaseVariantsAccepted.put("gt", "GT"); + upperCaseVariantsAccepted.put("lt", "LT"); + upperCaseVariantsAccepted.put("reg", "REG"); + upperCaseVariantsAccepted.put("amp", "AMP"); + } private static final CharArrayMap entityValues = new CharArrayMap(Version.LUCENE_40, 253, false); static { @@ -145,8 +153,9 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha" for (int i = 0 ; i < entities.length ; i += 2) { Character value = entities[i + 1].charAt(0); entityValues.put(entities[i], value); - if (upperCaseVariantsAccepted.contains(entities[i])) { - entityValues.put(entities[i].toUpperCase(), value); + String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]); + if (upperCaseVariant != null) { + entityValues.put(upperCaseVariant, value); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java index 949110284e9..2b83aa017e4 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/24/12 4:50 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */ package org.apache.lucene.analysis.charfilter; @@ -21,7 +21,8 @@ package org.apache.lucene.analysis.charfilter; import java.io.IOException; import java.util.Arrays; -import java.util.HashSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.lucene.util.Version; @@ -39,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 3/24/12 4:50 PM from the specification file - * C:/cygwin/home/s/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex + * on 5/18/12 12:24 PM from the specification file + * C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex */ public final class HTMLStripCharFilter extends BaseCharFilter { @@ -30522,8 +30523,16 @@ public final class HTMLStripCharFilter extends BaseCharFilter { private boolean zzEOFDone; /* user code: */ - private static final Set upperCaseVariantsAccepted - = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp")); + private static final Map upperCaseVariantsAccepted + = new HashMap(); + static { + upperCaseVariantsAccepted.put("quot", "QUOT"); + upperCaseVariantsAccepted.put("copy", "COPY"); + upperCaseVariantsAccepted.put("gt", "GT"); + upperCaseVariantsAccepted.put("lt", "LT"); + upperCaseVariantsAccepted.put("reg", "REG"); + upperCaseVariantsAccepted.put("amp", "AMP"); + } private static final CharArrayMap entityValues = new CharArrayMap(Version.LUCENE_40, 253, false); static { @@ -30605,8 +30614,9 @@ public final class HTMLStripCharFilter extends BaseCharFilter { for (int i = 0 ; i < entities.length ; i += 2) { Character value = entities[i + 1].charAt(0); entityValues.put(entities[i], value); - if (upperCaseVariantsAccepted.contains(entities[i])) { - entityValues.put(entities[i].toUpperCase(), value); + String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]); + if (upperCaseVariant != null) { + entityValues.put(upperCaseVariant, value); } } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex index 54d62cc9372..3c9116b8440 100755 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex @@ -19,7 +19,8 @@ package org.apache.lucene.analysis.charfilter; import java.io.IOException; import java.util.Arrays; -import java.util.HashSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.lucene.util.Version; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py index b3300687556..ff9ee6bf3a1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/htmlentity.py @@ -50,8 +50,16 @@ def main(): print output_line, ')' print '%{' - print ' private static final Set upperCaseVariantsAccepted' - print ' = new HashSet(Arrays.asList("quot","copy","gt","lt","reg","amp"));' + print ' private static final Map upperCaseVariantsAccepted' + print ' = new HashMap();' + print ' static {' + print ' upperCaseVariantsAccepted.put("quot", "QUOT");' + print ' upperCaseVariantsAccepted.put("copy", "COPY");' + print ' upperCaseVariantsAccepted.put("gt", "GT");' + print ' upperCaseVariantsAccepted.put("lt", "LT");' + print ' upperCaseVariantsAccepted.put("reg", "REG");' + print ' upperCaseVariantsAccepted.put("amp", "AMP");' + print ' }' print ' private static final CharArrayMap entityValues' print ' = new CharArrayMap(Version.LUCENE_40, %i, false);' % len(keys) print ' static {' @@ -68,8 +76,9 @@ def main(): print ' for (int i = 0 ; i < entities.length ; i += 2) {' print ' Character value = entities[i + 1].charAt(0);' print ' entityValues.put(entities[i], value);' - print ' if (upperCaseVariantsAccepted.contains(entities[i])) {' - print ' entityValues.put(entities[i].toUpperCase(), value);' + print ' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);' + print ' if (upperCaseVariant != null) {' + print ' entityValues.put(upperCaseVariant, value);' print ' }' print ' }' print " }" diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java index 43e2e0625d1..5be44bc032e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java @@ -246,7 +246,7 @@ public class FrenchLightStemmer { char ch = s[0]; for (int i = 1; i < len; i++) { - if (s[i] == ch) + if (s[i] == ch && Character.isLetter(ch)) len = delete(s, i--, len); else ch = s[i]; @@ -260,7 +260,7 @@ public class FrenchLightStemmer { if (s[len-1] == 'r') len--; if (s[len-1] == 'e') len--; if (s[len-1] == 'e') len--; - if (s[len-1] == s[len-2]) len--; + if (s[len-1] == s[len-2] && Character.isLetter(s[len-1])) len--; } return len; } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java index 6012e8282a9..d0f7af15c24 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java @@ -153,6 +153,22 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase { checkOneTerm(analyzer, "disposition", "dispos"); checkOneTerm(analyzer, "dispose", "dispos"); + + // SOLR-3463 : abusive compression of repeated characters in numbers + // Trailing repeated char elision : + checkOneTerm(analyzer, "1234555", "1234555"); + // Repeated char within numbers with more than 4 characters : + checkOneTerm(analyzer, "12333345", "12333345"); + // Short numbers weren't affected already: + checkOneTerm(analyzer, "1234", "1234"); + // Ensure behaviour is preserved for words! + // Trailing repeated char elision : + checkOneTerm(analyzer, "abcdeff", "abcdef"); + // Repeated char within words with more than 4 characters : + checkOneTerm(analyzer, "abcccddeef", "abcdef"); + checkOneTerm(analyzer, "créées", "cre"); + // Combined letter and digit repetition + checkOneTerm(analyzer, "22hh00", "22h00"); // 10:00pm } /** Test against a vocabulary from the reference impl */ diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java index c83de194d7a..977ab49f0e8 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java @@ -252,6 +252,8 @@ public class ToStringUtil { /** * Romanize katakana with modified hepburn */ + // TODO: now that this is used by readingsfilter and not just for + // debugging, fix this to really be a scheme that works best with IMEs public static void getRomanization(Appendable builder, CharSequence s) throws IOException { final int len = s.length(); for (int i = 0; i < len; i++) { @@ -522,6 +524,9 @@ public class ToStringUtil { if (ch2 == 'ウ') { builder.append("tō"); i++; + } else if (ch2 == 'ゥ') { + builder.append("tu"); + i++; } else { builder.append("to"); } @@ -665,7 +670,7 @@ public class ToStringUtil { builder.append("mu"); break; case 'メ': - builder.append("mi"); + builder.append("me"); break; case 'モ': if (ch2 == 'ウ') { @@ -690,7 +695,12 @@ public class ToStringUtil { } break; case 'ラ': - builder.append("ra"); + if (ch2 == '゜') { + builder.append("la"); + i++; + } else { + builder.append("ra"); + } break; case 'リ': if (ch2 == 'ョ' && ch3 == 'ウ') { @@ -711,20 +721,36 @@ public class ToStringUtil { } else if (ch2 == 'ェ') { builder.append("rye"); i++; + } else if (ch2 == '゜') { + builder.append("li"); + i++; } else { builder.append("ri"); } break; case 'ル': - builder.append("ru"); + if (ch2 == '゜') { + builder.append("lu"); + i++; + } else { + builder.append("ru"); + } break; case 'レ': - builder.append("re"); + if (ch2 == '゜') { + builder.append("le"); + i++; + } else { + builder.append("re"); + } break; case 'ロ': if (ch2 == 'ウ') { builder.append("rō"); i++; + } else if (ch2 == '゜') { + builder.append("lo"); + i++; } else { builder.append("ro"); } @@ -887,7 +913,28 @@ public class ToStringUtil { builder.append("da"); break; case 'ヂ': - builder.append("ji"); + // TODO: investigate all this + if (ch2 == 'ョ' && ch3 == 'ウ') { + builder.append("jō"); + i += 2; + } else if (ch2 == 'ュ' && ch3 == 'ウ') { + builder.append("jū"); + i += 2; + } else if (ch2 == 'ャ') { + builder.append("ja"); + i++; + } else if (ch2 == 'ョ') { + builder.append("jo"); + i++; + } else if (ch2 == 'ュ') { + builder.append("ju"); + i++; + } else if (ch2 == 'ェ') { + builder.append("je"); + i++; + } else { + builder.append("ji"); + } break; case 'ヅ': builder.append("zu"); @@ -994,6 +1041,18 @@ public class ToStringUtil { builder.append("po"); } break; + case 'ヷ': + builder.append("va"); + break; + case 'ヸ': + builder.append("vi"); + break; + case 'ヹ': + builder.append("ve"); + break; + case 'ヺ': + builder.append("vo"); + break; case 'ヴ': if (ch2 == 'ィ' && ch3 == 'ェ') { builder.append("vye"); diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java index f95a527dcb9..a2388d7c03c 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/util/TestToStringUtil.java @@ -17,6 +17,9 @@ package org.apache.lucene.analysis.ja.util; * limitations under the License. */ +import java.util.HashMap; +import java.util.Map; + import org.apache.lucene.util.LuceneTestCase; public class TestToStringUtil extends LuceneTestCase { @@ -31,4 +34,79 @@ public class TestToStringUtil extends LuceneTestCase { assertEquals("chashu", ToStringUtil.getRomanization("チャーシュー")); assertEquals("shumai", ToStringUtil.getRomanization("シューマイ")); } + + // see http://en.wikipedia.org/wiki/Hepburn_romanization, + // but this isnt even thorough or really probably what we want! + public void testHepburnTable() { + Map table = new HashMap() {{ + put("ア", "a"); put("イ", "i"); put("ウ", "u"); put("エ", "e"); put("オ", "o"); + put("カ", "ka"); put("キ", "ki"); put("ク", "ku"); put("ケ", "ke"); put("コ", "ko"); + put("サ", "sa"); put("シ", "shi"); put("ス", "su"); put("セ", "se"); put("ソ", "so"); + put("タ", "ta"); put("チ", "chi"); put("ツ", "tsu"); put("テ", "te"); put("ト", "to"); + put("ナ", "na"); put("ニ", "ni"); put("ヌ", "nu"); put("ネ", "ne"); put("ノ", "no"); + put("ハ", "ha"); put("ヒ", "hi"); put("フ", "fu"); put("ヘ", "he"); put("ホ", "ho"); + put("マ", "ma"); put("ミ", "mi"); put("ム", "mu"); put("メ", "me"); put("モ", "mo"); + put("ヤ", "ya"); put("ユ", "yu"); put("ヨ", "yo"); + put("ラ", "ra"); put("リ", "ri"); put("ル", "ru"); put("レ", "re"); put("ロ", "ro"); + put("ワ", "wa"); put("ヰ", "i"); put("ヱ", "e"); put("ヲ", "o"); + put("ン", "n"); + put("ガ", "ga"); put("ギ", "gi"); put("グ", "gu"); put("ゲ", "ge"); put("ゴ", "go"); + put("ザ", "za"); put("ジ", "ji"); put("ズ", "zu"); put("ゼ", "ze"); put("ゾ", "zo"); + put("ダ", "da"); put("ヂ", "ji"); put("ヅ", "zu"); put("デ", "de"); put("ド", "do"); + put("バ", "ba"); put("ビ", "bi"); put("ブ", "bu"); put("ベ", "be"); put("ボ", "bo"); + put("パ", "pa"); put("ピ", "pi"); put("プ", "pu"); put("ペ", "pe"); put("ポ", "po"); + + put("キャ", "kya"); put("キュ", "kyu"); put("キョ", "kyo"); + put("シャ", "sha"); put("シュ", "shu"); put("ショ", "sho"); + put("チャ", "cha"); put("チュ", "chu"); put("チョ", "cho"); + put("ニャ", "nya"); put("ニュ", "nyu"); put("ニョ", "nyo"); + put("ヒャ", "hya"); put("ヒュ", "hyu"); put("ヒョ", "hyo"); + put("ミャ", "mya"); put("ミュ", "myu"); put("ミョ", "myo"); + put("リャ", "rya"); put("リュ", "ryu"); put("リョ", "ryo"); + put("ギャ", "gya"); put("ギュ", "gyu"); put("ギョ", "gyo"); + put("ジャ", "ja"); put("ジュ", "ju"); put("ジョ", "jo"); + put("ヂャ", "ja"); put("ヂュ", "ju"); put("ヂョ", "jo"); + put("ビャ", "bya"); put("ビュ", "byu"); put("ビョ", "byo"); + put("ピャ", "pya"); put("ピュ", "pyu"); put("ピョ", "pyo"); + + put("イィ", "yi"); put("イェ", "ye"); + put("ウァ", "wa"); put("ウィ", "wi"); put("ウゥ", "wu"); put("ウェ", "we"); put("ウォ", "wo"); + put("ウュ", "wyu"); + // TODO: really should be vu + put("ヴァ", "va"); put("ヴィ", "vi"); put("ヴ", "v"); put("ヴェ", "ve"); put("ヴォ", "vo"); + put("ヴャ", "vya"); put("ヴュ", "vyu"); put("ヴィェ", "vye"); put("ヴョ", "vyo"); + put("キェ", "kye"); + put("ギェ", "gye"); + put("クァ", "kwa"); put("クィ", "kwi"); put("クェ", "kwe"); put("クォ", "kwo"); + put("クヮ", "kwa"); + put("グァ", "gwa"); put("グィ", "gwi"); put("グェ", "gwe"); put("グォ", "gwo"); + put("グヮ", "gwa"); + put("シェ", "she"); + put("ジェ", "je"); + put("スィ", "si"); + put("ズィ", "zi"); + put("チェ", "che"); + put("ツァ", "tsa"); put("ツィ", "tsi"); put("ツェ", "tse"); put("ツォ", "tso"); + put("ツュ", "tsyu"); + put("ティ", "ti"); put("トゥ", "tu"); + put("テュ", "tyu"); + put("ディ", "di"); put("ドゥ", "du"); + put("デュ", "dyu"); + put("ニェ", "nye"); + put("ヒェ", "hye"); + put("ビェ", "bye"); + put("ピェ", "pye"); + put("ファ", "fa"); put("フィ", "fi"); put("フェ", "fe"); put("フォ", "fo"); + put("フャ", "fya"); put("フュ", "fyu"); put("フィェ", "fye"); put("フョ", "fyo"); + put("ホゥ", "hu"); + put("ミェ", "mye"); + put("リェ", "rye"); + put("ラ゜", "la"); put("リ゜", "li"); put("ル゜", "lu"); put("レ゜", "le"); put("ロ゜", "lo"); + put("ヷ", "va"); put("ヸ", "vi"); put("ヹ", "ve"); put("ヺ", "vo"); + }}; + + for (String s : table.keySet()) { + assertEquals(s, table.get(s), ToStringUtil.getRomanization(s)); + } + } } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java index 44ff891cc3f..fc60959ee9e 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java @@ -25,20 +25,20 @@ import java.util.HashMap; import java.util.Locale; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.benchmark.byTask.feeds.ContentSource; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.FacetSource; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.benchmark.byTask.stats.Points; +import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.benchmark.byTask.tasks.PerfTask; import org.apache.lucene.benchmark.byTask.tasks.ReadTask; import org.apache.lucene.benchmark.byTask.tasks.SearchTask; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.FileUtils; -import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; @@ -80,6 +80,7 @@ public class PerfRunData implements Closeable { private Directory directory; private Analyzer analyzer; private DocMaker docMaker; + private ContentSource contentSource; private FacetSource facetSource; private Locale locale; @@ -105,10 +106,16 @@ public class PerfRunData implements Closeable { // analyzer (default is standard analyzer) analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); + + // content source + String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource"); + contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance(); + contentSource.setConfig(config); + // doc maker docMaker = Class.forName(config.get("doc.maker", "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); - docMaker.setConfig(config); + docMaker.setConfig(config, contentSource); // facet source facetSource = Class.forName(config.get("facet.source", "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); @@ -129,10 +136,11 @@ public class PerfRunData implements Closeable { } } + @Override public void close() throws IOException { IOUtils.close(indexWriter, indexReader, directory, taxonomyWriter, taxonomyReader, taxonomyDir, - docMaker, facetSource); + docMaker, facetSource, contentSource); // close all perf objects that are closeable. ArrayList perfObjectsToClose = new ArrayList(); @@ -361,7 +369,12 @@ public class PerfRunData implements Closeable { this.analyzer = analyzer; } - /** Returns the docMaker. */ + /** Returns the ContentSource. */ + public ContentSource getContentSource() { + return contentSource; + } + + /** Returns the DocMaker. */ public DocMaker getDocMaker() { return docMaker; } @@ -393,6 +406,7 @@ public class PerfRunData implements Closeable { } public void resetInputs() throws IOException { + contentSource.resetInputs(); docMaker.resetInputs(); facetSource.resetInputs(); for (final QueryMaker queryMaker : readTaskQueryMaker.values()) { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java index 36738c2d8d4..51dd9ba31e1 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java @@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable { * items generated since the last reset, so it's important to call * super.resetInputs in case you override this method. */ - @SuppressWarnings("unused") public void resetInputs() throws IOException { bytesCount = 0; itemCount = 0; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index 64fc3e103b2..a7b263dc6a7 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -355,26 +355,11 @@ public class DocMaker implements Closeable { * {@link ContentSource}, and it can be overridden to do more work (but make * sure to call super.close()). */ + @Override public void close() throws IOException { source.close(); } - /** - * Returns the number of bytes generated by the content source since last - * reset. - */ - public synchronized long getBytesCount() { - return source.getBytesCount(); - } - - /** - * Returns the total number of bytes that were generated by the content source - * defined to that doc maker. - */ - public long getTotalBytesCount() { - return source.getTotalBytesCount(); - } - /** * Creates a {@link Document} object ready for indexing. This method uses the * {@link ContentSource} to get the next document from the source, and creates @@ -426,26 +411,16 @@ public class DocMaker implements Closeable { public synchronized void resetInputs() throws IOException { source.printStatistics("docs"); // re-initiate since properties by round may have changed. - setConfig(config); + setConfig(config, source); source.resetInputs(); numDocsCreated.set(0); resetLeftovers(); } /** Set the configuration parameters of this doc maker. */ - public void setConfig(Config config) { + public void setConfig(Config config, ContentSource source) { this.config = config; - try { - if (source != null) { - source.close(); - } - String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource"); - source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance(); - source.setConfig(config); - } catch (Exception e) { - // Should not get here. Throw runtime exception. - throw new RuntimeException(e); - } + this.source = source; boolean stored = config.get("doc.stored", false); boolean bodyStored = config.get("doc.body.stored", stored); diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ConsumeContentSourceTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ConsumeContentSourceTask.java index 5dbed927ecc..53cc6ca1052 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ConsumeContentSourceTask.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ConsumeContentSourceTask.java @@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.ContentSource; import org.apache.lucene.benchmark.byTask.feeds.DocData; -import org.apache.lucene.benchmark.byTask.utils.Config; -/** - * Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. - * Supports the following parameters: - *
    - *
  • content.source - the content source to use. (mandatory) - *
- */ +/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */ public class ConsumeContentSourceTask extends PerfTask { - private ContentSource source; - private DocData dd = new DocData(); + private final ContentSource source; + private ThreadLocal dd = new ThreadLocal(); public ConsumeContentSourceTask(PerfRunData runData) { super(runData); - Config config = runData.getConfig(); - String sourceClass = config.get("content.source", null); - if (sourceClass == null) { - throw new IllegalArgumentException("content.source must be defined"); - } - try { - source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance(); - source.setConfig(config); - source.resetInputs(); - } catch (Exception e) { - throw new RuntimeException(e); - } + source = runData.getContentSource(); } @Override @@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask { return "read " + recsCount + " documents from the content source"; } - @Override - public void close() throws Exception { - source.close(); - super.close(); - } - @Override public int doLogic() throws Exception { - dd = source.getNextDocData(dd); + dd.set(source.getNextDocData(dd.get())); return 1; } diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java index feeb6dac2b6..56527342f06 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java @@ -22,7 +22,9 @@ import java.io.FileWriter; import java.io.IOException; import java.util.Properties; +import org.apache.lucene.benchmark.byTask.feeds.ContentSource; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource; import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.Document; @@ -122,15 +124,19 @@ public class ExtractWikipedia { } else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) { keepImageOnlyDocs = false; } - } - DocMaker docMaker = new DocMaker(); + Properties properties = new Properties(); - properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource"); properties.setProperty("docs.file", wikipedia.getAbsolutePath()); properties.setProperty("content.source.forever", "false"); properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs)); - docMaker.setConfig(new Config(properties)); + Config config = new Config(properties); + + ContentSource source = new EnwikiContentSource(); + source.setConfig(config); + + DocMaker docMaker = new DocMaker(); + docMaker.setConfig(config, source); docMaker.resetInputs(); if (wikipedia.exists()) { System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource"); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java index 47133b01708..dc1c78fa87f 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java @@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.tasks.AddDocTask; import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask; import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask; -import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask; import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.Document; @@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs; /** Tests the functionality of {@link DocMaker}. */ public class DocMakerTest extends BenchmarkTestCase { - static final class OneDocSource extends ContentSource { + public static final class OneDocSource extends ContentSource { private boolean finish = false; @@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase { // Indexing configuration. props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); - props.setProperty("content.source", OneDocSource.class.getName()); props.setProperty("directory", "RAMDirectory"); if (setNormsProp) { props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal)); @@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase { Config config = new Config(props); DocMaker dm = new DocMaker(); - dm.setConfig(config); + dm.setConfig(config, new OneDocSource()); return dm.makeDocument(); } @@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase { ps.close(); Properties props = new Properties(); - props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource"); props.setProperty("docs.file", f.getAbsolutePath()); props.setProperty("content.source.forever", "false"); Config config = new Config(props); + + ContentSource source = new LineDocSource(); + source.setConfig(config); + DocMaker dm = new DocMaker(); - dm.setConfig(config); + dm.setConfig(config, source); dm.resetInputs(); dm.resetInputs(); dm.close(); diff --git a/lucene/build.xml b/lucene/build.xml index 7e16b01354d..91d3d3ce84c 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -225,8 +225,13 @@ - + + + + self.addMappedName((new java.io.File(source)).toURI()); + + - - - - + + + + + diff --git a/solr/example/multicore/core0/conf/solrconfig.xml b/solr/example/multicore/core0/conf/solrconfig.xml index 81c52bbe88e..d29101c22cf 100644 --- a/solr/example/multicore/core0/conf/solrconfig.xml +++ b/solr/example/multicore/core0/conf/solrconfig.xml @@ -29,16 +29,41 @@ ${solr.core0.data.dir:} - + + + ${solr.core0.data.dir:} + + + + + + + true + + + + + - + + + + solrpingquery + + + all + + + solr diff --git a/solr/example/multicore/core1/conf/schema.xml b/solr/example/multicore/core1/conf/schema.xml index d02865df407..5a27d39df2d 100644 --- a/solr/example/multicore/core1/conf/schema.xml +++ b/solr/example/multicore/core1/conf/schema.xml @@ -19,14 +19,16 @@ + - - - - + + + + + diff --git a/solr/example/multicore/core1/conf/solrconfig.xml b/solr/example/multicore/core1/conf/solrconfig.xml index 73e816f00e6..13c59fbf400 100644 --- a/solr/example/multicore/core1/conf/solrconfig.xml +++ b/solr/example/multicore/core1/conf/solrconfig.xml @@ -29,16 +29,41 @@ ${solr.core1.data.dir:} - + + + ${solr.core1.data.dir:} + + + + + + + true + + + + + - + + + + solrpingquery + + + all + + + solr diff --git a/solr/example/multicore/solr.xml b/solr/example/multicore/solr.xml index c6ed7e57983..ff97c67722b 100644 --- a/solr/example/multicore/solr.xml +++ b/solr/example/multicore/solr.xml @@ -28,7 +28,7 @@ adminPath: RequestHandler path to manage cores. If 'null' (or absent), cores will not be manageable via request handler --> - + diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index 787b078c119..d78a98e3ce8 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -1261,12 +1261,13 @@ http://wiki.apache.org/solr/ClusteringComponent - You'll need to set the solr.cluster.enabled system property + You'll need to set the solr.cluster.enabled system property when running solr to run with clustering enabled: - java -Dsolr.clustering.enabled=true -jar start.jar + java -Dsolr.clustering.enabled=true -jar start.jar + --> - diff --git a/solr/example/solr/conf/velocity/VM_global_library.vm b/solr/example/solr/conf/velocity/VM_global_library.vm index 0756ab8dc03..eae1bfebfb8 100644 --- a/solr/example/solr/conf/velocity/VM_global_library.vm +++ b/solr/example/solr/conf/velocity/VM_global_library.vm @@ -1,7 +1,10 @@ #macro(param $key)$request.params.get($key)#end -#macro(url_for_solr)/solr#if($request.core.name != "")/$request.core.name#end#end +#macro(url_root)/solr#end + +## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/ +#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end #macro(url_for_home)#url_for_solr/browse#end #macro(q)&q=$!{esc.url($params.get('q'))}#end diff --git a/solr/example/solr/conf/velocity/head.vm b/solr/example/solr/conf/velocity/head.vm index d240868772d..265b7d3f3d5 100644 --- a/solr/example/solr/conf/velocity/head.vm +++ b/solr/example/solr/conf/velocity/head.vm @@ -3,7 +3,7 @@ #param('title') - + diff --git a/solr/example/solr/conf/velocity/header.vm b/solr/example/solr/conf/velocity/header.vm index 2eb80782fb0..46f0bab7d43 100644 --- a/solr/example/solr/conf/velocity/header.vm +++ b/solr/example/solr/conf/velocity/header.vm @@ -1,3 +1,3 @@ \ No newline at end of file diff --git a/solr/webapp/web/admin.html b/solr/webapp/web/admin.html index db484c79c3f..a051ad4c07e 100644 --- a/solr/webapp/web/admin.html +++ b/solr/webapp/web/admin.html @@ -76,12 +76,30 @@ limitations under the License.
- \ No newline at end of file diff --git a/solr/webapp/web/tpl/logging.html b/solr/webapp/web/tpl/logging.html index 494d622e3a5..80671e5280d 100644 --- a/solr/webapp/web/tpl/logging.html +++ b/solr/webapp/web/tpl/logging.html @@ -20,13 +20,4 @@ limitations under the License. - -