From ac5e4dd3373f16772c3c0b85337d51871b0ca1a7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 9 Feb 2015 13:56:55 +0000 Subject: [PATCH] LUCENE-6224: cut over more package.htmls git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1658399 13f79535-47bb-0310-9956-ffa450edef68 --- .../analysis/morfologik/package-info.java | 29 + .../lucene/analysis/morfologik/package.html | 34 - .../analysis/phonetic/package-info.java | 21 + .../lucene/analysis/phonetic/package.html | 22 - .../analysis/cn/smart/hhmm/package-info.java | 22 + .../analysis/cn/smart/hhmm/package.html | 25 - .../analysis/cn/smart/package-info.java | 37 + .../lucene/analysis/cn/smart/package.html | 42 - .../lucene/analysis/pl/package-info.java | 21 + .../apache/lucene/analysis/pl/package.html | 22 - .../lucene/analysis/stempel/package-info.java | 21 + .../lucene/analysis/stempel/package.html | 22 - .../org/egothor/stemmer/package-info.java | 21 + .../src/java/org/egothor/stemmer/package.html | 22 - .../lucene/analysis/uima/ae/package-info.java | 21 + .../lucene/analysis/uima/ae/package.html | 21 - .../lucene/analysis/uima/package-info.java | 21 + .../apache/lucene/analysis/uima/package.html | 21 - .../org/apache/lucene/codecs/package.html | 1 + .../benchmark/byTask/feeds/package-info.java | 21 + .../benchmark/byTask/feeds/package.html | 23 - .../lucene/benchmark/byTask/package-info.java | 719 +++++++++++++++++ .../lucene/benchmark/byTask/package.html | 733 ------------------ .../byTask/programmatic/package-info.java | 21 + .../byTask/programmatic/package.html | 22 - .../benchmark/byTask/stats/package-info.java | 21 + .../benchmark/byTask/stats/package.html | 22 - .../benchmark/byTask/tasks/package-info.java | 21 + .../benchmark/byTask/tasks/package.html | 26 - .../benchmark/byTask/utils/package-info.java | 21 + .../benchmark/byTask/utils/package.html | 22 - .../apache/lucene/benchmark/package-info.java | 44 ++ .../org/apache/lucene/benchmark/package.html | 46 -- .../benchmark/quality/package-info.java | 80 ++ .../lucene/benchmark/quality/package.html | 83 -- .../benchmark/quality/trec/package-info.java | 21 + .../benchmark/quality/trec/package.html | 23 - .../benchmark/quality/utils/package-info.java | 21 + .../benchmark/quality/utils/package.html | 23 - .../lucene/benchmark/utils/package-info.java | 21 + .../lucene/benchmark/utils/package.html | 22 - 41 files changed, 1226 insertions(+), 1276 deletions(-) create mode 100644 lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java delete mode 100644 lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html create mode 100644 lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java delete mode 100644 lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html create mode 100644 lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java delete mode 100644 lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html create mode 100644 lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java delete mode 100644 lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html create mode 100644 lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java delete mode 100644 lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html create mode 100644 lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java delete mode 100644 lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html create mode 100644 lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java delete mode 100644 lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html create mode 100644 lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java delete mode 100644 lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html create mode 100644 lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java delete mode 100644 lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html create mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java delete mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html create mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java delete mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html create mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java delete mode 100755 lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java new file mode 100644 index 00000000000..9d58e1d44d1 --- /dev/null +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides dictionary-driven lemmatization ("accurate stemming") + * filter and analyzer for the Polish Language, driven by the + * Morfologik library developed + * by Dawid Weiss and Marcin Miłkowski. + *

+ * The MorfologikFilter yields one or more terms for each token. Each + * of those terms is given the same position in the index. + *

+ */ +package org.apache.lucene.analysis.morfologik; + diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html deleted file mode 100644 index 6b67d0e4c34..00000000000 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - -

- This package provides dictionary-driven lemmatization ("accurate stemming") - filter and analyzer for the Polish Language, driven by the - Morfologik library developed - by Dawid Weiss and Marcin Miłkowski. -

-

- The MorfologikFilter yields one or more terms for each token. Each - of those terms is given the same position in the index. -

- - diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java new file mode 100644 index 00000000000..c710c9880b1 --- /dev/null +++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analysis components for phonetic search. + */ +package org.apache.lucene.analysis.phonetic; diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html deleted file mode 100644 index 4e35a29f644..00000000000 --- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Analysis components for phonetic search. - - diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java new file mode 100644 index 00000000000..37d38b04ed1 --- /dev/null +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * SmartChineseAnalyzer Hidden Markov Model package. + * @lucene.experimental + */ +package org.apache.lucene.analysis.cn.smart.hhmm; diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html deleted file mode 100644 index e4eeb7fa7e7..00000000000 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - -SmartChineseAnalyzer Hidden Markov Model package. -@lucene.experimental - - diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java new file mode 100644 index 00000000000..d273a363922 --- /dev/null +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analyzer for Simplified Chinese, which indexes words. + * @lucene.experimental + *
+ * Three analyzers are provided for Chinese, each of which treats Chinese text in a different way. + * + * + * Example phrase: "我是中国人" + *
    + *
  1. StandardAnalyzer: 我-是-中-国-人
  2. + *
  3. CJKAnalyzer: 我是-是中-中国-国人
  4. + *
  5. SmartChineseAnalyzer: 我-是-中国-人
  6. + *
+ *
+ */ +package org.apache.lucene.analysis.cn.smart; diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html deleted file mode 100644 index cc9b8c179f3..00000000000 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - -Analyzer for Simplified Chinese, which indexes words. -@lucene.experimental -
-Three analyzers are provided for Chinese, each of which treats Chinese text in a different way. - - -Example phrase: "我是中国人" -
    -
  1. StandardAnalyzer: 我-是-中-国-人
  2. -
  3. CJKAnalyzer: 我是-是中-中国-国人
  4. -
  5. SmartChineseAnalyzer: 我-是-中国-人
  6. -
-
- - - diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java new file mode 100644 index 00000000000..6a6cc80acb2 --- /dev/null +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analyzer for Polish. + */ +package org.apache.lucene.analysis.pl; diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html deleted file mode 100644 index 1e7eabcba82..00000000000 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Analyzer for Polish. - - diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java new file mode 100644 index 00000000000..db125cd8d75 --- /dev/null +++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Stempel: Algorithmic Stemmer + */ +package org.apache.lucene.analysis.stempel; diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html deleted file mode 100644 index cf143f0c48f..00000000000 --- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -

Stempel: Algorithmic Stemmer

- - diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java new file mode 100644 index 00000000000..395aa659d13 --- /dev/null +++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Egothor stemmer API. + */ +package org.egothor.stemmer; diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html deleted file mode 100644 index 4d3a193260c..00000000000 --- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Egothor stemmer API. - - diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java new file mode 100644 index 00000000000..e08da29bbb6 --- /dev/null +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Integration with UIMA's AnalysisEngine. + */ +package org.apache.lucene.analysis.uima.ae; diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html deleted file mode 100644 index e040bdfb3bf..00000000000 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html +++ /dev/null @@ -1,21 +0,0 @@ - - - -Integration with UIMA's AnalysisEngine - - diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java new file mode 100644 index 00000000000..0baa0fc8530 --- /dev/null +++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Classes that integrate UIMA with Lucene's analysis API. + */ +package org.apache.lucene.analysis.uima; diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html deleted file mode 100644 index 5690c580a83..00000000000 --- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html +++ /dev/null @@ -1,21 +0,0 @@ - - - -Classes that integrate UIMA with Lucene's analysis API. - - diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html index 6defdbe769e..fe01fff7b0b 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html @@ -15,6 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. --> + diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java new file mode 100644 index 00000000000..8df0337404f --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Sources for benchmark inputs: documents and queries. + */ +package org.apache.lucene.benchmark.byTask.feeds; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html deleted file mode 100644 index 3feb9e35a41..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - -Sources for benchmark inputs: documents and queries. - - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java new file mode 100644 index 00000000000..465557aba5b --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java @@ -0,0 +1,719 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Benchmarking Lucene By Tasks + *

+ * This package provides "task based" performance benchmarking of Lucene. + * One can use the predefined benchmarks, or create new ones. + *

+ *

+ * Contained packages: + *

+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
PackageDescription
statsStatistics maintained when running benchmark tasks.
tasksBenchmark tasks.
feedsSources for benchmark inputs: documents and queries.
utilsUtilities used for the benchmark, and for the reports.
programmaticSample performance test written programmatically.
+ * + *

Table Of Contents

+ *
    + *
  1. Benchmarking By Tasks
  2. + *
  3. How to use
  4. + *
  5. Benchmark "algorithm"
  6. + *
  7. Supported tasks/commands
  8. + *
  9. Benchmark properties
  10. + *
  11. Example input algorithm and the result benchmark + * report.
  12. + *
  13. Results record counting clarified
  14. + *
+ * + *

Benchmarking By Tasks

+ *

+ * Benchmark Lucene using task primitives. + *

+ * + *

+ * A benchmark is composed of some predefined tasks, allowing for creating an + * index, adding documents, + * optimizing, searching, generating reports, and more. A benchmark run takes an + * "algorithm" file + * that contains a description of the sequence of tasks making up the run, and some + * properties defining a few + * additional characteristics of the benchmark run. + *

+ * + * + *

How to use

+ *

+ * Easiest way to run a benchmarks is using the predefined ant task: + *

+ * + *

+ * You may find existing tasks sufficient for defining the benchmark you + * need, otherwise, you can extend the framework to meet your needs, as explained + * herein. + *

+ * + *

+ * Each benchmark run has a DocMaker and a QueryMaker. These two should usually + * match, so that "meaningful" queries are used for a certain collection. + * Properties set at the header of the alg file define which "makers" should be + * used. You can also specify your own makers, extending DocMaker and implementing + * QueryMaker. + *

+ * Note: since 2.9, DocMaker is a concrete class which accepts a + * ContentSource. In most cases, you can use the DocMaker class to create + * Documents, while providing your own ContentSource implementation. For + * example, the current Benchmark package includes ContentSource + * implementations for TREC, Enwiki and Reuters collections, as well as + * others like LineDocSource which reads a 'line' file produced by + * WriteLineDocTask. + *
+ * + *

+ * Benchmark .alg file contains the benchmark "algorithm". The syntax is described + * below. Within the algorithm, you can specify groups of commands, assign them + * names, specify commands that should be repeated, + * do commands in serial or in parallel, + * and also control the speed of "firing" the commands. + *

+ * + *

+ * This allows, for instance, to specify + * that an index should be opened for update, + * documents should be added to it one by one but not faster than 20 docs a minute, + * and, in parallel with this, + * some N queries should be searched against that index, + * again, no more than 2 queries a second. + * You can have the searches all share an index reader, + * or have them each open its own reader and close it afterwords. + *

+ * + *

+ * If the commands available for use in the algorithm do not meet your needs, + * you can add commands by adding a new task under + * org.apache.lucene.benchmark.byTask.tasks - + * you should extend the PerfTask abstract class. + * Make sure that your new task class name is suffixed by Task. + * Assume you added the class "WonderfulTask" - doing so also enables the + * command "Wonderful" to be used in the algorithm. + *

+ * + *

+ * External classes: It is sometimes useful to invoke the benchmark + * package with your external alg file that configures the use of your own + * doc/query maker and or html parser. You can work this out without + * modifying the benchmark package code, by passing your class path + * with the benchmark.ext.classpath property: + *

+ *

+ * External tasks: When writing your own tasks under a package other than + * org.apache.lucene.benchmark.byTask.tasks specify that package thru the + * alt.tasks.packages property. + * + * + *

Benchmark "algorithm"

+ * + *

+ * The following is an informal description of the supported syntax. + *

+ * + *
    + *
  1. + * Measuring: When a command is executed, statistics for the elapsed + * execution time and memory consumption are collected. + * At any time, those statistics can be printed, using one of the + * available ReportTasks. + *
  2. + *
  3. + * Comments start with '#'. + *
  4. + *
  5. + * Serial sequences are enclosed within '{ }'. + *
  6. + *
  7. + * Parallel sequences are enclosed within + * '[ ]' + *
  8. + *
  9. + * Sequence naming: To name a sequence, put + * '"name"' just after + * '{' or '['. + *
    Example - { "ManyAdds" AddDoc } : 1000000 - + * would + * name the sequence of 1M add docs "ManyAdds", and this name would later appear + * in statistic reports. + * If you don't specify a name for a sequence, it is given one: you can see it as + * the algorithm is printed just before benchmark execution starts. + *
  10. + *
  11. + * Repeating: + * To repeat sequence tasks N times, add ': N' just + * after the + * sequence closing tag - '}' or + * ']' or '>'. + *
    Example - [ AddDoc ] : 4 - would do 4 addDoc + * in parallel, spawning 4 threads at once. + *
    Example - [ AddDoc AddDoc ] : 4 - would do + * 8 addDoc in parallel, spawning 8 threads at once. + *
    Example - { AddDoc } : 30 - would do addDoc + * 30 times in a row. + *
    Example - { AddDoc AddDoc } : 30 - would do + * addDoc 60 times in a row. + *
    Exhaustive repeating: use * instead of + * a number to repeat exhaustively. + * This is sometimes useful, for adding as many files as a doc maker can create, + * without iterating over the same file again, especially when the exact + * number of documents is not known in advance. For instance, TREC files extracted + * from a zip file. Note: when using this, you must also set + * content.source.forever to false. + *
    Example - { AddDoc } : * - would add docs + * until the doc maker is "exhausted". + *
  12. + *
  13. + * Command parameter: a command can optionally take a single parameter. + * If the certain command does not support a parameter, or if the parameter is of + * the wrong type, + * reading the algorithm will fail with an exception and the test would not start. + * Currently the following tasks take optional parameters: + *
      + *
    • AddDoc takes a numeric parameter, indicating the required size of + * added document. Note: if the DocMaker implementation used in the test + * does not support makeDoc(size), an exception would be thrown and the test + * would fail. + *
    • + *
    • DeleteDoc takes numeric parameter, indicating the docid to be + * deleted. The latter is not very useful for loops, since the docid is + * fixed, so for deletion in loops it is better to use the + * doc.delete.step property. + *
    • + *
    • SetProp takes a name,value mandatory param, + * ',' used as a separator. + *
    • + *
    • SearchTravRetTask and SearchTravTask take a numeric + * parameter, indicating the required traversal size. + *
    • + *
    • SearchTravRetLoadFieldSelectorTask takes a string + * parameter: a comma separated list of Fields to load. + *
    • + *
    • SearchTravRetHighlighterTask takes a string + * parameter: a comma separated list of parameters to define highlighting. See that + * tasks javadocs for more information + *
    • + *
    + *
    Example - AddDoc(2000) - would add a document + * of size 2000 (~bytes). + *
    See conf/task-sample.alg for how this can be used, for instance, to check + * which is faster, adding + * many smaller documents, or few larger documents. + * Next candidates for supporting a parameter may be the Search tasks, + * for controlling the query size. + *
  14. + *
  15. + * Statistic recording elimination: - a sequence can also end with + * '>', + * in which case child tasks would not store their statistics. + * This can be useful to avoid exploding stats data, for adding say 1M docs. + *
    Example - { "ManyAdds" AddDoc > : 1000000 - + * would add million docs, measure that total, but not save stats for each addDoc. + *
    Notice that the granularity of System.currentTimeMillis() (which is used + * here) is system dependant, + * and in some systems an operation that takes 5 ms to complete may show 0 ms + * latency time in performance measurements. + * Therefore it is sometimes more accurate to look at the elapsed time of a larger + * sequence, as demonstrated here. + *
  16. + *
  17. + * Rate: + * To set a rate (ops/sec or ops/min) for a sequence, add + * ': N : R' just after sequence closing tag. + * This would specify repetition of N with rate of R operations/sec. + * Use 'R/sec' or + * 'R/min' + * to explicitly specify that the rate is per second or per minute. + * The default is per second, + *
    Example - [ AddDoc ] : 400 : 3 - would do 400 + * addDoc in parallel, starting up to 3 threads per second. + *
    Example - { AddDoc } : 100 : 200/min - would + * do 100 addDoc serially, + * waiting before starting next add, if otherwise rate would exceed 200 adds/min. + *
  18. + *
  19. + * Disable Counting: Each task executed contributes to the records count. + * This count is reflected in reports under recs/s and under recsPerRun. + * Most tasks count 1, some count 0, and some count more. + * (See Results record counting clarified for more details.) + * It is possible to disable counting for a task by preceding it with -. + *
    Example - -CreateIndex - would count 0 while + * the default behavior for CreateIndex is to count 1. + *
  20. + *
  21. + * Command names: Each class "AnyNameTask" in the + * package org.apache.lucene.benchmark.byTask.tasks, + * that extends PerfTask, is supported as command "AnyName" that can be + * used in the benchmark "algorithm" description. + * This allows to add new commands by just adding such classes. + *
  22. + *
+ * + * + * + *

Supported tasks/commands

+ * + *

+ * Existing tasks can be divided into a few groups: + * regular index/search work tasks, report tasks, and control tasks. + *

+ * + *
    + * + *
  1. + * Report tasks: There are a few Report commands for generating reports. + * Only task runs that were completed are reported. + * (The 'Report tasks' themselves are not measured and not reported.) + *
      + *
    • + * RepAll - all (completed) task runs. + *
    • + *
    • + * RepSumByName - all statistics, + * aggregated by name. So, if AddDoc was executed 2000 times, + * only 1 report line would be created for it, aggregating all those + * 2000 statistic records. + *
    • + *
    • + * RepSelectByPref   prefixWord - all + * records for tasks whose name start with + * prefixWord. + *
    • + *
    • + * RepSumByPref   prefixWord - all + * records for tasks whose name start with + * prefixWord, + * aggregated by their full task name. + *
    • + *
    • + * RepSumByNameRound - all statistics, + * aggregated by name and by Round. + * So, if AddDoc was executed 2000 times in each of 3 + * rounds, 3 report lines would be + * created for it, + * aggregating all those 2000 statistic records in each round. + * See more about rounds in the NewRound + * command description below. + *
    • + *
    • + * RepSumByPrefRound   prefixWord - + * similar to RepSumByNameRound, + * just that only tasks whose name starts with + * prefixWord are included. + *
    • + *
    + * If needed, additional reports can be added by extending the abstract class + * ReportTask, and by + * manipulating the statistics data in Points and TaskStats. + *
  2. + * + *
  3. Control tasks: Few of the tasks control the benchmark algorithm + * all over: + *
      + *
    • + * ClearStats - clears the entire statistics. + * Further reports would only include task runs that would start after this + * call. + *
    • + *
    • + * NewRound - virtually start a new round of + * performance test. + * Although this command can be placed anywhere, it mostly makes sense at + * the end of an outermost sequence. + *
      This increments a global "round counter". All task runs that + * would start now would + * record the new, updated round counter as their round number. + * This would appear in reports. + * In particular, see RepSumByNameRound above. + *
      An additional effect of NewRound, is that numeric and boolean + * properties defined (at the head + * of the .alg file) as a sequence of values, e.g. + * merge.factor=mrg:10:100:10:100 would + * increment (cyclic) to the next value. + * Note: this would also be reflected in the reports, in this case under a + * column that would be named "mrg". + *
    • + *
    • + * ResetInputs - DocMaker and the + * various QueryMakers + * would reset their counters to start. + * The way these Maker interfaces work, each call for makeDocument() + * or makeQuery() creates the next document or query + * that it "knows" to create. + * If that pool is "exhausted", the "maker" start over again. + * The ResetInputs command + * therefore allows to make the rounds comparable. + * It is therefore useful to invoke ResetInputs together with NewRound. + *
    • + *
    • + * ResetSystemErase - reset all index + * and input data and call gc. + * Does NOT reset statistics. This contains ResetInputs. + * All writers/readers are nullified, deleted, closed. + * Index is erased. + * Directory is erased. + * You would have to call CreateIndex once this was called... + *
    • + *
    • + * ResetSystemSoft - reset all + * index and input data and call gc. + * Does NOT reset statistics. This contains ResetInputs. + * All writers/readers are nullified, closed. + * Index is NOT erased. + * Directory is NOT erased. + * This is useful for testing performance on an existing index, + * for instance if the construction of a large index + * took a very long time and now you would to test + * its search or update performance. + *
    • + *
    + *
  4. + * + *
  5. + * Other existing tasks are quite straightforward and would + * just be briefly described here. + *
      + *
    • + * CreateIndex and + * OpenIndex both leave the + * index open for later update operations. + * CloseIndex would close it. + *
    • + * OpenReader, similarly, would + * leave an index reader open for later search operations. + * But this have further semantics. + * If a Read operation is performed, and an open reader exists, + * it would be used. + * Otherwise, the read operation would open its own reader + * and close it when the read operation is done. + * This allows testing various scenarios - sharing a reader, + * searching with "cold" reader, with "warmed" reader, etc. + * The read operations affected by this are: + * Warm, + * Search, + * SearchTrav (search and traverse), + * and SearchTravRet (search + * and traverse and retrieve). + * Notice that each of the 3 search task types maintains + * its own queryMaker instance. + *
    • + * CommitIndex and + * ForceMerge can be used to commit + * changes to the index then merge the index segments. The integer + * parameter specifies how many segments to merge down to (default + * 1). + *
    • + * WriteLineDoc prepares a 'line' + * file where each line holds a document with title, + * date and body elements, separated by [TAB]. + * A line file is useful if one wants to measure pure indexing + * performance, without the overhead of parsing the data.
      + * You can use LineDocSource as a ContentSource over a 'line' + * file. + *
    • + * ConsumeContentSource consumes + * a ContentSource. Useful for e.g. testing a ContentSource + * performance, without the overhead of preparing a Document + * out of it. + *
    + *
  6. + *
+ * + * + *

Benchmark properties

+ * + *

+ * Properties are read from the header of the .alg file, and + * define several parameters of the performance test. + * As mentioned above for the NewRound task, + * numeric and boolean properties that are defined as a sequence + * of values, e.g. merge.factor=mrg:10:100:10:100 + * would increment (cyclic) to the next value, + * when NewRound is called, and would also + * appear as a named column in the reports (column + * name would be "mrg" in this example). + *

+ * + *

+ * Some of the currently defined properties are: + *

+ * + *
    + *
  1. + * analyzer - full + * class name for the analyzer to use. + * Same analyzer would be used in the entire test. + *
  2. + * + *
  3. + * directory - valid values are + * This tells which directory to use for the performance test. + *
  4. + * + *
  5. + * Index work parameters: + * Multi int/boolean values would be iterated with calls to NewRound. + * There would be also added as columns in the reports, first string in the + * sequence is the column name. + * (Make sure it is no shorter than any value in the sequence). + *
      + *
    • max.buffered + *
      Example: max.buffered=buf:10:10:100:100 - + * this would define using maxBufferedDocs of 10 in iterations 0 and 1, + * and 100 in iterations 2 and 3. + *
    • + *
    • + * merge.factor - which + * merge factor to use. + *
    • + *
    • + * compound - whether the index is + * using the compound format or not. Valid values are "true" and "false". + *
    • + *
    + *
+ * + *

+ * Here is a list of currently defined properties: + *

+ *
    + * + *
  1. Root directory for data and indexes: + *
    • work.dir (default is System property "benchmark.work.dir" or "work".) + *
    + *
  2. + * + *
  3. Docs and queries creation: + *
    • analyzer + *
    • doc.maker + *
    • content.source.forever + *
    • html.parser + *
    • doc.stored + *
    • doc.tokenized + *
    • doc.term.vector + *
    • doc.term.vector.positions + *
    • doc.term.vector.offsets + *
    • doc.store.body.bytes + *
    • docs.dir + *
    • query.maker + *
    • file.query.maker.file + *
    • file.query.maker.default.field + *
    • search.num.hits + *
    + *
  4. + * + *
  5. Logging: + *
    • log.step + *
    • log.step.[class name]Task ie log.step.DeleteDoc (e.g. log.step.Wonderful for the WonderfulTask example above). + *
    • log.queries + *
    • task.max.depth.log + *
    + *
  6. + * + *
  7. Index writing: + *
    • compound + *
    • merge.factor + *
    • max.buffered + *
    • directory + *
    • ram.flush.mb + *
    • codec.postingsFormat (eg Direct) Note: no codec should be specified through default.codec + *
    + *
  8. + * + *
  9. Doc deletion: + *
    • doc.delete.step + *
    + *
  10. + * + *
  11. Spatial: Numerous; see spatial.alg + *
  12. + * + *
  13. Task alternative packages: + *
    • alt.tasks.packages + * - comma separated list of additional packages where tasks classes will be looked for + * when not found in the default package (that of PerfTask). If the same task class + * appears in more than one package, the package indicated first in this list will be used. + *
    + *
  14. + * + *
+ * + *

+ * For sample use of these properties see the *.alg files under conf. + *

+ * + * + *

Example input algorithm and the result benchmark report

+ *

+ * The following example is in conf/sample.alg: + *

+ * # --------------------------------------------------------
+ * #
+ * # Sample: what is the effect of doc size on indexing time?
+ * #
+ * # There are two parts in this test:
+ * # - PopulateShort adds 2N documents of length  L
+ * # - PopulateLong  adds  N documents of length 2L
+ * # Which one would be faster?
+ * # The comparison is done twice.
+ * #
+ * # --------------------------------------------------------
+ * 
+ * # -------------------------------------------------------------------------------------
+ * # multi val params are iterated by NewRound's, added to reports, start with column name.
+ * merge.factor=mrg:10:20
+ * max.buffered=buf:100:1000
+ * compound=true
+ * 
+ * analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+ * directory=FSDirectory
+ * 
+ * doc.stored=true
+ * doc.tokenized=true
+ * doc.term.vector=false
+ * doc.add.log.step=500
+ * 
+ * docs.dir=reuters-out
+ * 
+ * doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+ * 
+ * query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+ * 
+ * # task at this depth or less would print when they start
+ * task.max.depth.log=2
+ * 
+ * log.queries=false
+ * # -------------------------------------------------------------------------------------
+ * {
+ * 
+ *     { "PopulateShort"
+ *         CreateIndex
+ *         { AddDoc(4000) > : 20000
+ *         Optimize
+ *         CloseIndex
+ *     >
+ * 
+ *     ResetSystemErase
+ * 
+ *     { "PopulateLong"
+ *         CreateIndex
+ *         { AddDoc(8000) > : 10000
+ *         Optimize
+ *         CloseIndex
+ *     >
+ * 
+ *     ResetSystemErase
+ * 
+ *     NewRound
+ * 
+ * } : 2
+ * 
+ * RepSumByName
+ * RepSelectByPref Populate
+ * 
+ * 
+ * + *

+ * The command line for running this sample: + *
ant run-task -Dtask.alg=conf/sample.alg + *

+ * + *

+ * The output report from running this test contains the following: + *

+ * Operation     round mrg  buf   runCnt   recsPerRun        rec/s  elapsedSec    avgUsedMem    avgTotalMem
+ * PopulateShort     0  10  100        1        20003        119.6      167.26    12,959,120     14,241,792
+ * PopulateLong -  - 0  10  100 -  -   1 -  -   10003 -  -  - 74.3 -  - 134.57 -  17,085,208 -   20,635,648
+ * PopulateShort     1  20 1000        1        20003        143.5      139.39    63,982,040     94,756,864
+ * PopulateLong -  - 1  20 1000 -  -   1 -  -   10003 -  -  - 77.0 -  - 129.92 -  87,309,608 -  100,831,232
+ * 
+ * + * + *

Results record counting clarified

+ *

+ * Two columns in the results table indicate records counts: records-per-run and + * records-per-second. What does it mean? + *

+ * Almost every task gets 1 in this count just for being executed. + * Task sequences aggregate the counts of their child tasks, + * plus their own count of 1. + * So, a task sequence containing 5 other task sequences, each running a single + * other task 10 times, would have a count of 1 + 5 * (1 + 10) = 56. + *

+ * The traverse and retrieve tasks "count" more: a traverse task + * would add 1 for each traversed result (hit), and a retrieve task would + * additionally add 1 for each retrieved doc. So, regular Search would + * count 1, SearchTrav that traverses 10 hits would count 11, and a + * SearchTravRet task that retrieves (and traverses) 10, would count 21. + *

+ * Confusing? this might help: always examine the elapsedSec column, + * and always compare "apples to apples", .i.e. it is interesting to check how the + * rec/s changed for the same task (or sequence) between two + * different runs, but it is not very useful to know how the rec/s + * differs between Search and SearchTrav tasks. For + * the latter, elapsedSec would bring more insight. + *

+ */ +package org.apache.lucene.benchmark.byTask; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html deleted file mode 100644 index 7c0967180cb..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html +++ /dev/null @@ -1,733 +0,0 @@ - - - - - Benchmarking Lucene By Tasks - - -Benchmarking Lucene By Tasks. -
-

-This package provides "task based" performance benchmarking of Lucene. -One can use the predefined benchmarks, or create new ones. -

-

-Contained packages: -

- - - - - - - - - - - - - - - - - - - - - - - - - - -
PackageDescription
statsStatistics maintained when running benchmark tasks.
tasksBenchmark tasks.
feedsSources for benchmark inputs: documents and queries.
utilsUtilities used for the benchmark, and for the reports.
programmaticSample performance test written programmatically.
- -

Table Of Contents

-

-

    -
  1. Benchmarking By Tasks
  2. -
  3. How to use
  4. -
  5. Benchmark "algorithm"
  6. -
  7. Supported tasks/commands
  8. -
  9. Benchmark properties
  10. -
  11. Example input algorithm and the result benchmark - report.
  12. -
  13. Results record counting clarified
  14. -
-

- -

Benchmarking By Tasks

-

-Benchmark Lucene using task primitives. -

- -

-A benchmark is composed of some predefined tasks, allowing for creating an -index, adding documents, -optimizing, searching, generating reports, and more. A benchmark run takes an -"algorithm" file -that contains a description of the sequence of tasks making up the run, and some -properties defining a few -additional characteristics of the benchmark run. -

- - -

How to use

-

-Easiest way to run a benchmarks is using the predefined ant task: -

-

- -

-You may find existing tasks sufficient for defining the benchmark you -need, otherwise, you can extend the framework to meet your needs, as explained -herein. -

- -

-Each benchmark run has a DocMaker and a QueryMaker. These two should usually -match, so that "meaningful" queries are used for a certain collection. -Properties set at the header of the alg file define which "makers" should be -used. You can also specify your own makers, extending DocMaker and implementing -QueryMaker. -

- Note: since 2.9, DocMaker is a concrete class which accepts a - ContentSource. In most cases, you can use the DocMaker class to create - Documents, while providing your own ContentSource implementation. For - example, the current Benchmark package includes ContentSource - implementations for TREC, Enwiki and Reuters collections, as well as - others like LineDocSource which reads a 'line' file produced by - WriteLineDocTask. -
-

- -

-Benchmark .alg file contains the benchmark "algorithm". The syntax is described -below. Within the algorithm, you can specify groups of commands, assign them -names, specify commands that should be repeated, -do commands in serial or in parallel, -and also control the speed of "firing" the commands. -

- -

-This allows, for instance, to specify -that an index should be opened for update, -documents should be added to it one by one but not faster than 20 docs a minute, -and, in parallel with this, -some N queries should be searched against that index, -again, no more than 2 queries a second. -You can have the searches all share an index reader, -or have them each open its own reader and close it afterwords. -

- -

-If the commands available for use in the algorithm do not meet your needs, -you can add commands by adding a new task under -org.apache.lucene.benchmark.byTask.tasks - -you should extend the PerfTask abstract class. -Make sure that your new task class name is suffixed by Task. -Assume you added the class "WonderfulTask" - doing so also enables the -command "Wonderful" to be used in the algorithm. -

- -

-External classes: It is sometimes useful to invoke the benchmark -package with your external alg file that configures the use of your own -doc/query maker and or html parser. You can work this out without -modifying the benchmark package code, by passing your class path -with the benchmark.ext.classpath property: -

-External tasks: When writing your own tasks under a package other than -org.apache.lucene.benchmark.byTask.tasks specify that package thru the -alt.tasks.packages property. -

- - -

Benchmark "algorithm"

- -

-The following is an informal description of the supported syntax. -

- -
    -
  1. - Measuring: When a command is executed, statistics for the elapsed - execution time and memory consumption are collected. - At any time, those statistics can be printed, using one of the - available ReportTasks. -
  2. -
  3. - Comments start with '#'. -
  4. -
  5. - Serial sequences are enclosed within '{ }'. -
  6. -
  7. - Parallel sequences are enclosed within - '[ ]' -
  8. -
  9. - Sequence naming: To name a sequence, put - '"name"' just after - '{' or '['. -
    Example - { "ManyAdds" AddDoc } : 1000000 - - would - name the sequence of 1M add docs "ManyAdds", and this name would later appear - in statistic reports. - If you don't specify a name for a sequence, it is given one: you can see it as - the algorithm is printed just before benchmark execution starts. -
  10. -
  11. - Repeating: - To repeat sequence tasks N times, add ': N' just - after the - sequence closing tag - '}' or - ']' or '>'. -
    Example - [ AddDoc ] : 4 - would do 4 addDoc - in parallel, spawning 4 threads at once. -
    Example - [ AddDoc AddDoc ] : 4 - would do - 8 addDoc in parallel, spawning 8 threads at once. -
    Example - { AddDoc } : 30 - would do addDoc - 30 times in a row. -
    Example - { AddDoc AddDoc } : 30 - would do - addDoc 60 times in a row. -
    Exhaustive repeating: use * instead of - a number to repeat exhaustively. - This is sometimes useful, for adding as many files as a doc maker can create, - without iterating over the same file again, especially when the exact - number of documents is not known in advance. For instance, TREC files extracted - from a zip file. Note: when using this, you must also set - content.source.forever to false. -
    Example - { AddDoc } : * - would add docs - until the doc maker is "exhausted". -
  12. -
  13. - Command parameter: a command can optionally take a single parameter. - If the certain command does not support a parameter, or if the parameter is of - the wrong type, - reading the algorithm will fail with an exception and the test would not start. - Currently the following tasks take optional parameters: -
      -
    • AddDoc takes a numeric parameter, indicating the required size of - added document. Note: if the DocMaker implementation used in the test - does not support makeDoc(size), an exception would be thrown and the test - would fail. -
    • -
    • DeleteDoc takes numeric parameter, indicating the docid to be - deleted. The latter is not very useful for loops, since the docid is - fixed, so for deletion in loops it is better to use the - doc.delete.step property. -
    • -
    • SetProp takes a name,value mandatory param, - ',' used as a separator. -
    • -
    • SearchTravRetTask and SearchTravTask take a numeric - parameter, indicating the required traversal size. -
    • -
    • SearchTravRetLoadFieldSelectorTask takes a string - parameter: a comma separated list of Fields to load. -
    • -
    • SearchTravRetHighlighterTask takes a string - parameter: a comma separated list of parameters to define highlighting. See that - tasks javadocs for more information -
    • -
    -
    Example - AddDoc(2000) - would add a document - of size 2000 (~bytes). -
    See conf/task-sample.alg for how this can be used, for instance, to check - which is faster, adding - many smaller documents, or few larger documents. - Next candidates for supporting a parameter may be the Search tasks, - for controlling the query size. -
  14. -
  15. - Statistic recording elimination: - a sequence can also end with - '>', - in which case child tasks would not store their statistics. - This can be useful to avoid exploding stats data, for adding say 1M docs. -
    Example - { "ManyAdds" AddDoc > : 1000000 - - would add million docs, measure that total, but not save stats for each addDoc. -
    Notice that the granularity of System.currentTimeMillis() (which is used - here) is system dependant, - and in some systems an operation that takes 5 ms to complete may show 0 ms - latency time in performance measurements. - Therefore it is sometimes more accurate to look at the elapsed time of a larger - sequence, as demonstrated here. -
  16. -
  17. - Rate: - To set a rate (ops/sec or ops/min) for a sequence, add - ': N : R' just after sequence closing tag. - This would specify repetition of N with rate of R operations/sec. - Use 'R/sec' or - 'R/min' - to explicitly specify that the rate is per second or per minute. - The default is per second, -
    Example - [ AddDoc ] : 400 : 3 - would do 400 - addDoc in parallel, starting up to 3 threads per second. -
    Example - { AddDoc } : 100 : 200/min - would - do 100 addDoc serially, - waiting before starting next add, if otherwise rate would exceed 200 adds/min. -
  18. -
  19. - Disable Counting: Each task executed contributes to the records count. - This count is reflected in reports under recs/s and under recsPerRun. - Most tasks count 1, some count 0, and some count more. - (See Results record counting clarified for more details.) - It is possible to disable counting for a task by preceding it with -. -
    Example - -CreateIndex - would count 0 while - the default behavior for CreateIndex is to count 1. -
  20. -
  21. - Command names: Each class "AnyNameTask" in the - package org.apache.lucene.benchmark.byTask.tasks, - that extends PerfTask, is supported as command "AnyName" that can be - used in the benchmark "algorithm" description. - This allows to add new commands by just adding such classes. -
  22. -
- - - -

Supported tasks/commands

- -

-Existing tasks can be divided into a few groups: -regular index/search work tasks, report tasks, and control tasks. -

- -
    - -
  1. - Report tasks: There are a few Report commands for generating reports. - Only task runs that were completed are reported. - (The 'Report tasks' themselves are not measured and not reported.) -
      -
    • - RepAll - all (completed) task runs. -
    • -
    • - RepSumByName - all statistics, - aggregated by name. So, if AddDoc was executed 2000 times, - only 1 report line would be created for it, aggregating all those - 2000 statistic records. -
    • -
    • - RepSelectByPref   prefixWord - all - records for tasks whose name start with - prefixWord. -
    • -
    • - RepSumByPref   prefixWord - all - records for tasks whose name start with - prefixWord, - aggregated by their full task name. -
    • -
    • - RepSumByNameRound - all statistics, - aggregated by name and by Round. - So, if AddDoc was executed 2000 times in each of 3 - rounds, 3 report lines would be - created for it, - aggregating all those 2000 statistic records in each round. - See more about rounds in the NewRound - command description below. -
    • -
    • - RepSumByPrefRound   prefixWord - - similar to RepSumByNameRound, - just that only tasks whose name starts with - prefixWord are included. -
    • -
    - If needed, additional reports can be added by extending the abstract class - ReportTask, and by - manipulating the statistics data in Points and TaskStats. -
  2. - -
  3. Control tasks: Few of the tasks control the benchmark algorithm - all over: -
      -
    • - ClearStats - clears the entire statistics. - Further reports would only include task runs that would start after this - call. -
    • -
    • - NewRound - virtually start a new round of - performance test. - Although this command can be placed anywhere, it mostly makes sense at - the end of an outermost sequence. -
      This increments a global "round counter". All task runs that - would start now would - record the new, updated round counter as their round number. - This would appear in reports. - In particular, see RepSumByNameRound above. -
      An additional effect of NewRound, is that numeric and boolean - properties defined (at the head - of the .alg file) as a sequence of values, e.g. - merge.factor=mrg:10:100:10:100 would - increment (cyclic) to the next value. - Note: this would also be reflected in the reports, in this case under a - column that would be named "mrg". -
    • -
    • - ResetInputs - DocMaker and the - various QueryMakers - would reset their counters to start. - The way these Maker interfaces work, each call for makeDocument() - or makeQuery() creates the next document or query - that it "knows" to create. - If that pool is "exhausted", the "maker" start over again. - The ResetInputs command - therefore allows to make the rounds comparable. - It is therefore useful to invoke ResetInputs together with NewRound. -
    • -
    • - ResetSystemErase - reset all index - and input data and call gc. - Does NOT reset statistics. This contains ResetInputs. - All writers/readers are nullified, deleted, closed. - Index is erased. - Directory is erased. - You would have to call CreateIndex once this was called... -
    • -
    • - ResetSystemSoft - reset all - index and input data and call gc. - Does NOT reset statistics. This contains ResetInputs. - All writers/readers are nullified, closed. - Index is NOT erased. - Directory is NOT erased. - This is useful for testing performance on an existing index, - for instance if the construction of a large index - took a very long time and now you would to test - its search or update performance. -
    • -
    -
  4. - -
  5. - Other existing tasks are quite straightforward and would - just be briefly described here. -
      -
    • - CreateIndex and - OpenIndex both leave the - index open for later update operations. - CloseIndex would close it. -
    • - OpenReader, similarly, would - leave an index reader open for later search operations. - But this have further semantics. - If a Read operation is performed, and an open reader exists, - it would be used. - Otherwise, the read operation would open its own reader - and close it when the read operation is done. - This allows testing various scenarios - sharing a reader, - searching with "cold" reader, with "warmed" reader, etc. - The read operations affected by this are: - Warm, - Search, - SearchTrav (search and traverse), - and SearchTravRet (search - and traverse and retrieve). - Notice that each of the 3 search task types maintains - its own queryMaker instance. -
    • - CommitIndex and - ForceMerge can be used to commit - changes to the index then merge the index segments. The integer - parameter specifies how many segments to merge down to (default - 1). -
    • - WriteLineDoc prepares a 'line' - file where each line holds a document with title, - date and body elements, separated by [TAB]. - A line file is useful if one wants to measure pure indexing - performance, without the overhead of parsing the data.
      - You can use LineDocSource as a ContentSource over a 'line' - file. -
    • - ConsumeContentSource consumes - a ContentSource. Useful for e.g. testing a ContentSource - performance, without the overhead of preparing a Document - out of it. -
    -
  6. -
- - -

Benchmark properties

- -

-Properties are read from the header of the .alg file, and -define several parameters of the performance test. -As mentioned above for the NewRound task, -numeric and boolean properties that are defined as a sequence -of values, e.g. merge.factor=mrg:10:100:10:100 -would increment (cyclic) to the next value, -when NewRound is called, and would also -appear as a named column in the reports (column -name would be "mrg" in this example). -

- -

-Some of the currently defined properties are: -

- -
    -
  1. - analyzer - full - class name for the analyzer to use. - Same analyzer would be used in the entire test. -
  2. - -
  3. - directory - valid values are - This tells which directory to use for the performance test. -
  4. - -
  5. - Index work parameters: - Multi int/boolean values would be iterated with calls to NewRound. - There would be also added as columns in the reports, first string in the - sequence is the column name. - (Make sure it is no shorter than any value in the sequence). -
      -
    • max.buffered -
      Example: max.buffered=buf:10:10:100:100 - - this would define using maxBufferedDocs of 10 in iterations 0 and 1, - and 100 in iterations 2 and 3. -
    • -
    • - merge.factor - which - merge factor to use. -
    • -
    • - compound - whether the index is - using the compound format or not. Valid values are "true" and "false". -
    • -
    -
- -

-Here is a list of currently defined properties: -

-
    - -
  1. Root directory for data and indexes:
  2. -
    • work.dir (default is System property "benchmark.work.dir" or "work".) -
    - - -
  3. Docs and queries creation:
  4. -
    • analyzer -
    • doc.maker -
    • content.source.forever -
    • html.parser -
    • doc.stored -
    • doc.tokenized -
    • doc.term.vector -
    • doc.term.vector.positions -
    • doc.term.vector.offsets -
    • doc.store.body.bytes -
    • docs.dir -
    • query.maker -
    • file.query.maker.file -
    • file.query.maker.default.field -
    • search.num.hits -
    - - -
  5. Logging: -
    • log.step -
    • log.step.[class name]Task ie log.step.DeleteDoc (e.g. log.step.Wonderful for the WonderfulTask example above). -
    • log.queries -
    • task.max.depth.log -
    -
  6. - -
  7. Index writing: -
    • compound -
    • merge.factor -
    • max.buffered -
    • directory -
    • ram.flush.mb -
    • codec.postingsFormat (eg Direct) Note: no codec should be specified through default.codec -
    -
  8. - -
  9. Doc deletion: -
    • doc.delete.step -
    -
  10. - -
  11. Spatial: Numerous; see spatial.alg -
  12. - -
  13. Task alternative packages: -
    • alt.tasks.packages - - comma separated list of additional packages where tasks classes will be looked for - when not found in the default package (that of PerfTask). If the same task class - appears in more than one package, the package indicated first in this list will be used. -
    -
  14. - -
- -

-For sample use of these properties see the *.alg files under conf. -

- - -

Example input algorithm and the result benchmark report

-

-The following example is in conf/sample.alg: -

-# --------------------------------------------------------
-#
-# Sample: what is the effect of doc size on indexing time?
-#
-# There are two parts in this test:
-# - PopulateShort adds 2N documents of length  L
-# - PopulateLong  adds  N documents of length 2L
-# Which one would be faster?
-# The comparison is done twice.
-#
-# --------------------------------------------------------
-
-# -------------------------------------------------------------------------------------
-# multi val params are iterated by NewRound's, added to reports, start with column name.
-merge.factor=mrg:10:20
-max.buffered=buf:100:1000
-compound=true
-
-analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
-directory=FSDirectory
-
-doc.stored=true
-doc.tokenized=true
-doc.term.vector=false
-doc.add.log.step=500
-
-docs.dir=reuters-out
-
-doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
-
-query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
-
-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=false
-# -------------------------------------------------------------------------------------
-{
-
-    { "PopulateShort"
-        CreateIndex
-        { AddDoc(4000) > : 20000
-        Optimize
-        CloseIndex
-    >
-
-    ResetSystemErase
-
-    { "PopulateLong"
-        CreateIndex
-        { AddDoc(8000) > : 10000
-        Optimize
-        CloseIndex
-    >
-
-    ResetSystemErase
-
-    NewRound
-
-} : 2
-
-RepSumByName
-RepSelectByPref Populate
-
-
-

- -

-The command line for running this sample: -
ant run-task -Dtask.alg=conf/sample.alg -

- -

-The output report from running this test contains the following: -

-Operation     round mrg  buf   runCnt   recsPerRun        rec/s  elapsedSec    avgUsedMem    avgTotalMem
-PopulateShort     0  10  100        1        20003        119.6      167.26    12,959,120     14,241,792
-PopulateLong -  - 0  10  100 -  -   1 -  -   10003 -  -  - 74.3 -  - 134.57 -  17,085,208 -   20,635,648
-PopulateShort     1  20 1000        1        20003        143.5      139.39    63,982,040     94,756,864
-PopulateLong -  - 1  20 1000 -  -   1 -  -   10003 -  -  - 77.0 -  - 129.92 -  87,309,608 -  100,831,232
-
-

- - -

Results record counting clarified

-

-Two columns in the results table indicate records counts: records-per-run and -records-per-second. What does it mean? -

-Almost every task gets 1 in this count just for being executed. -Task sequences aggregate the counts of their child tasks, -plus their own count of 1. -So, a task sequence containing 5 other task sequences, each running a single -other task 10 times, would have a count of 1 + 5 * (1 + 10) = 56. -

-The traverse and retrieve tasks "count" more: a traverse task -would add 1 for each traversed result (hit), and a retrieve task would -additionally add 1 for each retrieved doc. So, regular Search would -count 1, SearchTrav that traverses 10 hits would count 11, and a -SearchTravRet task that retrieves (and traverses) 10, would count 21. -

-Confusing? this might help: always examine the elapsedSec column, -and always compare "apples to apples", .i.e. it is interesting to check how the -rec/s changed for the same task (or sequence) between two -different runs, but it is not very useful to know how the rec/s -differs between Search and SearchTrav tasks. For -the latter, elapsedSec would bring more insight. -

- -
-
 
- - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java new file mode 100644 index 00000000000..d9dfd44ff9b --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Sample performance test written programmatically - no algorithm file is needed here. + */ +package org.apache.lucene.benchmark.byTask.programmatic; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html deleted file mode 100644 index 7221c425645..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Sample performance test written programmatically - no algorithm file is needed here. - - \ No newline at end of file diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java new file mode 100644 index 00000000000..2bc0751b405 --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Statistics maintained when running benchmark tasks. + */ +package org.apache.lucene.benchmark.byTask.stats; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html deleted file mode 100644 index fb44623f5e5..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - Statistics maintained when running benchmark tasks. - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java new file mode 100644 index 00000000000..d06f4848a72 --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Extendable benchmark tasks. + */ +package org.apache.lucene.benchmark.byTask.tasks; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html deleted file mode 100644 index 9c17edc3c2c..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - -Extendable benchmark tasks. - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java new file mode 100644 index 00000000000..4066a7a98af --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities used for the benchmark, and for the reports. + */ +package org.apache.lucene.benchmark.byTask.utils; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html deleted file mode 100644 index 6a71c2fd39a..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Utilities used for the benchmark, and for the reports. - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java new file mode 100644 index 00000000000..2df570b12ad --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Lucene Benchmarking Package + *

+ * The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora. + *

+ * ANT will download the corpus automatically, place it in a temp directory and then unpack it to the + * working.dir directory specified in the build. The temp directory and working directory can be safely + * removed after a run. However, the next time the task is run, it will need to download the files again. + *

+ * Classes implementing the Benchmarker interface should have a no-argument constructor if they are to be + * used with the Driver class. The Driver class is provided for convenience only. Feel free to implement + * your own main class for your benchmarker. + *

+ * The StandardBenchmarker is meant to be just that, a standard that runs out of the box with no + * configuration or changes needed. Other benchmarking classes may derive from it to provide alternate + * views or to take in command line options. When reporting benchmarking runs you should state any + * alterations you have made. + *

+ * To run the short version of the StandardBenchmarker, call "ant run-micro-standard". This should take + * a minute or so to complete and give you a preliminary idea of how your change affects the code. + *

+ * To run the long version of the StandardBenchmarker, call "ant run-standard". This takes considerably longer. + *

+ * The original code for these classes was donated by Andrzej Bialecki at http://issues.apache.org/jira/browse/LUCENE-675 + * and has been updated by Grant Ingersoll to make some parts of the code reusable in other benchmarkers + */ +package org.apache.lucene.benchmark; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html deleted file mode 100644 index 2daf1865fd4..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html +++ /dev/null @@ -1,46 +0,0 @@ - - - - - Lucene Benchmarking Package - - -The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora. -

-

- ANT will - download the corpus automatically, place it in a temp directory and then unpack it to the working.dir directory specified in the build. - The temp directory - and working directory can be safely removed after a run. However, the next time the task is run, it will need to download the files again. -

- Classes implementing the Benchmarker interface should have a no-argument constructor if they are to be used with the Driver class. The Driver - class is provided for convenience only. Feel free to implement your own main class for your benchmarker. -

- The StandardBenchmarker is meant to be just that, a standard that runs out of the box with no configuration or changes needed. - Other benchmarking classes may derive from it to provide alternate views or to take in command line options. When reporting benchmarking runs - you should state any alterations you have made. -

- To run the short version of the StandardBenchmarker, call "ant run-micro-standard". This should take a minute or so to complete and give you a preliminary idea of how your change affects the code -

- To run the long version of the StandardBenchmarker, call "ant run-standard". This takes considerably longer. -

- The original code for these classes was donated by Andrzej Bialecki at http://issues.apache.org/jira/browse/LUCENE-675 and has been updated by Grant Ingersoll to make some parts of the code reusable in other benchmarkers -

-
 
- - \ No newline at end of file diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java new file mode 100755 index 00000000000..de19b3c5bec --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Search Quality Benchmarking. + *

+ * This package allows to benchmark search quality of a Lucene application. + *

+ * In order to use this package you should provide: + *

+ *

+ * For benchmarking TREC collections with TREC QRels, take a look at the + * {@link org.apache.lucene.benchmark.quality.trec trec package}. + *

+ * Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection: + * + *

+ *     File topicsFile = new File("topics-701-850.txt");
+ *     File qrelsFile = new File("qrels-701-850.txt");
+ *     IndexReader ir = DirectoryReader.open(directory):
+ *     IndexSearcher searcher = new IndexSearcher(ir);
+ * 
+ *     int maxResults = 1000;
+ *     String docNameField = "docname"; 
+ *     
+ *     PrintWriter logger = new PrintWriter(System.out,true); 
+ * 
+ *     // use trec utilities to read trec topics into quality queries
+ *     TrecTopicsReader qReader = new TrecTopicsReader();
+ *     QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+ *     
+ *     // prepare judge, with trec utilities that read from a QRels file
+ *     Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+ *     
+ *     // validate topics & judgments match each other
+ *     judge.validateData(qqs, logger);
+ *     
+ *     // set the parsing of quality queries into Lucene queries.
+ *     QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+ *     
+ *     // run the benchmark
+ *     QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+ *     SubmissionReport submitLog = null;
+ *     QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+ *     
+ *     // print an average sum of the results
+ *     QualityStats avg = QualityStats.average(stats);
+ *     avg.log("SUMMARY",2,logger, "  ");
+ * 
+ * + *

+ * Some immediate ways to modify this program to your needs are: + *

+ */ +package org.apache.lucene.benchmark.quality; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html deleted file mode 100755 index 10afab9c30f..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html +++ /dev/null @@ -1,83 +0,0 @@ - - - - -

Search Quality Benchmarking.

-

-This package allows to benchmark search quality of a Lucene application. -

-In order to use this package you should provide: -

-

-For benchmarking TREC collections with TREC QRels, take a look at the -trec package. -

-Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection: - -

-    File topicsFile = new File("topics-701-850.txt");
-    File qrelsFile = new File("qrels-701-850.txt");
-    IndexReader ir = DirectoryReader.open(directory):
-    IndexSearcher searcher = new IndexSearcher(ir);
-
-    int maxResults = 1000;
-    String docNameField = "docname"; 
-    
-    PrintWriter logger = new PrintWriter(System.out,true); 
-
-    // use trec utilities to read trec topics into quality queries
-    TrecTopicsReader qReader = new TrecTopicsReader();
-    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
-    
-    // prepare judge, with trec utilities that read from a QRels file
-    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
-    
-    // validate topics & judgments match each other
-    judge.validateData(qqs, logger);
-    
-    // set the parsing of quality queries into Lucene queries.
-    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
-    
-    // run the benchmark
-    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
-    SubmissionReport submitLog = null;
-    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
-    
-    // print an avarage sum of the results
-    QualityStats avg = QualityStats.average(stats);
-    avg.log("SUMMARY",2,logger, "  ");
-
- -

-Some immediate ways to modify this program to your needs are: -

- - - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java new file mode 100755 index 00000000000..45ddec80a5c --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs. + */ +package org.apache.lucene.benchmark.quality.trec; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html deleted file mode 100755 index dafccb7fc8a..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - -Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs. - - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java new file mode 100755 index 00000000000..56d721d4dca --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Miscellaneous utilities for search quality benchmarking: query parsing, submission reports. + */ +package org.apache.lucene.benchmark.quality.utils; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html deleted file mode 100755 index 7fde1a86c63..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - -Miscellaneous utilities for search quality benchmarking: query parsing, submission reports. - - - diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java new file mode 100644 index 00000000000..956b84fa927 --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Benchmark Utility functions. + */ +package org.apache.lucene.benchmark.utils; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html deleted file mode 100644 index 06cfbee86b4..00000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - -Benchmark Utility functions. - -