diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java
new file mode 100644
index 00000000000..9d58e1d44d1
--- /dev/null
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package provides dictionary-driven lemmatization ("accurate stemming")
+ * filter and analyzer for the Polish Language, driven by the
+ * Morfologik library developed
+ * by Dawid Weiss and Marcin Miłkowski.
+ *
+ * The MorfologikFilter yields one or more terms for each token. Each
+ * of those terms is given the same position in the index.
+ *
+ */
+package org.apache.lucene.analysis.morfologik;
+
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html
deleted file mode 100644
index 6b67d0e4c34..00000000000
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/package.html
+++ /dev/null
@@ -1,34 +0,0 @@
-
-
-
-
-
-
-
-
- This package provides dictionary-driven lemmatization ("accurate stemming")
- filter and analyzer for the Polish Language, driven by the
- Morfologik library developed
- by Dawid Weiss and Marcin Miłkowski.
-
-
- The MorfologikFilter yields one or more terms for each token. Each
- of those terms is given the same position in the index.
-
-
-
diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java
new file mode 100644
index 00000000000..c710c9880b1
--- /dev/null
+++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analysis components for phonetic search.
+ */
+package org.apache.lucene.analysis.phonetic;
diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html
deleted file mode 100644
index 4e35a29f644..00000000000
--- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Analysis components for phonetic search.
-
-
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java
new file mode 100644
index 00000000000..37d38b04ed1
--- /dev/null
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * SmartChineseAnalyzer Hidden Markov Model package.
+ * @lucene.experimental
+ */
+package org.apache.lucene.analysis.cn.smart.hhmm;
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
deleted file mode 100644
index e4eeb7fa7e7..00000000000
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-
-
-
-
-
-
-SmartChineseAnalyzer Hidden Markov Model package.
-@lucene.experimental
-
-
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java
new file mode 100644
index 00000000000..d273a363922
--- /dev/null
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package-info.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analyzer for Simplified Chinese, which indexes words.
+ * @lucene.experimental
+ *
+ * Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
+ *
+ * - StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
+ *
- CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
+ *
- SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens.
+ *
+ *
+ * Example phrase: "我是中国人"
+ *
+ * - StandardAnalyzer: 我-是-中-国-人
+ * - CJKAnalyzer: 我是-是中-中国-国人
+ * - SmartChineseAnalyzer: 我-是-中国-人
+ *
+ *
+ */
+package org.apache.lucene.analysis.cn.smart;
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html
deleted file mode 100644
index cc9b8c179f3..00000000000
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-
-
-
-
-Analyzer for Simplified Chinese, which indexes words.
-@lucene.experimental
-
-Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
-
- - StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
-
- CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
-
- SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens.
-
-
-Example phrase: "我是中国人"
-
- - StandardAnalyzer: 我-是-中-国-人
- - CJKAnalyzer: 我是-是中-中国-国人
- - SmartChineseAnalyzer: 我-是-中国-人
-
-
-
-
-
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java
new file mode 100644
index 00000000000..6a6cc80acb2
--- /dev/null
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analyzer for Polish.
+ */
+package org.apache.lucene.analysis.pl;
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html
deleted file mode 100644
index 1e7eabcba82..00000000000
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Analyzer for Polish.
-
-
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java
new file mode 100644
index 00000000000..db125cd8d75
--- /dev/null
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Stempel: Algorithmic Stemmer
+ */
+package org.apache.lucene.analysis.stempel;
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html
deleted file mode 100644
index cf143f0c48f..00000000000
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
- Stempel: Algorithmic Stemmer
-
-
diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java
new file mode 100644
index 00000000000..395aa659d13
--- /dev/null
+++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Egothor stemmer API.
+ */
+package org.egothor.stemmer;
diff --git a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html b/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html
deleted file mode 100644
index 4d3a193260c..00000000000
--- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Egothor stemmer API.
-
-
diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java
new file mode 100644
index 00000000000..e08da29bbb6
--- /dev/null
+++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Integration with UIMA's AnalysisEngine.
+ */
+package org.apache.lucene.analysis.uima.ae;
diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html
deleted file mode 100644
index e040bdfb3bf..00000000000
--- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/package.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-Integration with UIMA's AnalysisEngine
-
-
diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java
new file mode 100644
index 00000000000..0baa0fc8530
--- /dev/null
+++ b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Classes that integrate UIMA with Lucene's analysis API.
+ */
+package org.apache.lucene.analysis.uima;
diff --git a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html b/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html
deleted file mode 100644
index 5690c580a83..00000000000
--- a/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/package.html
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-Classes that integrate UIMA with Lucene's analysis API.
-
-
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html
index 6defdbe769e..fe01fff7b0b 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/package.html
@@ -15,6 +15,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
+
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java
new file mode 100644
index 00000000000..8df0337404f
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Sources for benchmark inputs: documents and queries.
+ */
+package org.apache.lucene.benchmark.byTask.feeds;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html
deleted file mode 100644
index 3feb9e35a41..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-Sources for benchmark inputs: documents and queries.
-
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java
new file mode 100644
index 00000000000..465557aba5b
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package-info.java
@@ -0,0 +1,719 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Benchmarking Lucene By Tasks
+ *
+ * This package provides "task based" performance benchmarking of Lucene.
+ * One can use the predefined benchmarks, or create new ones.
+ *
+ *
+ * Contained packages:
+ *
+ *
+ *
+ *
+ * Package |
+ * Description |
+ *
+ *
+ * stats |
+ * Statistics maintained when running benchmark tasks. |
+ *
+ *
+ * tasks |
+ * Benchmark tasks. |
+ *
+ *
+ * feeds |
+ * Sources for benchmark inputs: documents and queries. |
+ *
+ *
+ * utils |
+ * Utilities used for the benchmark, and for the reports. |
+ *
+ *
+ * programmatic |
+ * Sample performance test written programmatically. |
+ *
+ *
+ *
+ * Table Of Contents
+ *
+ * - Benchmarking By Tasks
+ * - How to use
+ * - Benchmark "algorithm"
+ * - Supported tasks/commands
+ * - Benchmark properties
+ * - Example input algorithm and the result benchmark
+ * report.
+ * - Results record counting clarified
+ *
+ *
+ * Benchmarking By Tasks
+ *
+ * Benchmark Lucene using task primitives.
+ *
+ *
+ *
+ * A benchmark is composed of some predefined tasks, allowing for creating an
+ * index, adding documents,
+ * optimizing, searching, generating reports, and more. A benchmark run takes an
+ * "algorithm" file
+ * that contains a description of the sequence of tasks making up the run, and some
+ * properties defining a few
+ * additional characteristics of the benchmark run.
+ *
+ *
+ *
+ * How to use
+ *
+ * Easiest way to run a benchmarks is using the predefined ant task:
+ *
+ * - ant run-task
+ *
- would run the micro-standard.alg
"algorithm".
+ *
+ * - ant run-task -Dtask.alg=conf/compound-penalty.alg
+ *
- would run the compound-penalty.alg
"algorithm".
+ *
+ * - ant run-task -Dtask.alg=[full-path-to-your-alg-file]
+ *
- would run your perf test
"algorithm".
+ *
+ * - java org.apache.lucene.benchmark.byTask.programmatic.Sample
+ *
- would run a performance test programmatically - without using an alg
+ * file. This is less readable, and less convenient, but possible.
+ *
+ *
+ *
+ *
+ * You may find existing tasks sufficient for defining the benchmark you
+ * need, otherwise, you can extend the framework to meet your needs, as explained
+ * herein.
+ *
+ *
+ *
+ * Each benchmark run has a DocMaker and a QueryMaker. These two should usually
+ * match, so that "meaningful" queries are used for a certain collection.
+ * Properties set at the header of the alg file define which "makers" should be
+ * used. You can also specify your own makers, extending DocMaker and implementing
+ * QueryMaker.
+ *
+ * Note: since 2.9, DocMaker is a concrete class which accepts a
+ * ContentSource. In most cases, you can use the DocMaker class to create
+ * Documents, while providing your own ContentSource implementation. For
+ * example, the current Benchmark package includes ContentSource
+ * implementations for TREC, Enwiki and Reuters collections, as well as
+ * others like LineDocSource which reads a 'line' file produced by
+ * WriteLineDocTask.
+ *
+ *
+ *
+ * Benchmark .alg file contains the benchmark "algorithm". The syntax is described
+ * below. Within the algorithm, you can specify groups of commands, assign them
+ * names, specify commands that should be repeated,
+ * do commands in serial or in parallel,
+ * and also control the speed of "firing" the commands.
+ *
+ *
+ *
+ * This allows, for instance, to specify
+ * that an index should be opened for update,
+ * documents should be added to it one by one but not faster than 20 docs a minute,
+ * and, in parallel with this,
+ * some N queries should be searched against that index,
+ * again, no more than 2 queries a second.
+ * You can have the searches all share an index reader,
+ * or have them each open its own reader and close it afterwords.
+ *
+ *
+ *
+ * If the commands available for use in the algorithm do not meet your needs,
+ * you can add commands by adding a new task under
+ * org.apache.lucene.benchmark.byTask.tasks -
+ * you should extend the PerfTask abstract class.
+ * Make sure that your new task class name is suffixed by Task.
+ * Assume you added the class "WonderfulTask" - doing so also enables the
+ * command "Wonderful" to be used in the algorithm.
+ *
+ *
+ *
+ * External classes: It is sometimes useful to invoke the benchmark
+ * package with your external alg file that configures the use of your own
+ * doc/query maker and or html parser. You can work this out without
+ * modifying the benchmark package code, by passing your class path
+ * with the benchmark.ext.classpath property:
+ *
+ * - ant run-task -Dtask.alg=[full-path-to-your-alg-file]
+ * -Dbenchmark.ext.classpath=/mydir/classes
+ * -Dtask.mem=512M
+ *
+ *
+ * External tasks: When writing your own tasks under a package other than
+ * org.apache.lucene.benchmark.byTask.tasks specify that package thru the
+ * alt.tasks.packages property.
+ *
+ *
+ *
Benchmark "algorithm"
+ *
+ *
+ * The following is an informal description of the supported syntax.
+ *
+ *
+ *
+ * -
+ * Measuring: When a command is executed, statistics for the elapsed
+ * execution time and memory consumption are collected.
+ * At any time, those statistics can be printed, using one of the
+ * available ReportTasks.
+ *
+ * -
+ * Comments start with '#'.
+ *
+ * -
+ * Serial sequences are enclosed within '{ }'.
+ *
+ * -
+ * Parallel sequences are enclosed within
+ * '[ ]'
+ *
+ * -
+ * Sequence naming: To name a sequence, put
+ * '"name"' just after
+ * '{' or '['.
+ *
Example - { "ManyAdds" AddDoc } : 1000000 -
+ * would
+ * name the sequence of 1M add docs "ManyAdds", and this name would later appear
+ * in statistic reports.
+ * If you don't specify a name for a sequence, it is given one: you can see it as
+ * the algorithm is printed just before benchmark execution starts.
+ *
+ * -
+ * Repeating:
+ * To repeat sequence tasks N times, add ': N' just
+ * after the
+ * sequence closing tag - '}' or
+ * ']' or '>'.
+ *
Example - [ AddDoc ] : 4 - would do 4 addDoc
+ * in parallel, spawning 4 threads at once.
+ *
Example - [ AddDoc AddDoc ] : 4 - would do
+ * 8 addDoc in parallel, spawning 8 threads at once.
+ *
Example - { AddDoc } : 30 - would do addDoc
+ * 30 times in a row.
+ *
Example - { AddDoc AddDoc } : 30 - would do
+ * addDoc 60 times in a row.
+ *
Exhaustive repeating: use * instead of
+ * a number to repeat exhaustively.
+ * This is sometimes useful, for adding as many files as a doc maker can create,
+ * without iterating over the same file again, especially when the exact
+ * number of documents is not known in advance. For instance, TREC files extracted
+ * from a zip file. Note: when using this, you must also set
+ * content.source.forever to false.
+ *
Example - { AddDoc } : * - would add docs
+ * until the doc maker is "exhausted".
+ *
+ * -
+ * Command parameter: a command can optionally take a single parameter.
+ * If the certain command does not support a parameter, or if the parameter is of
+ * the wrong type,
+ * reading the algorithm will fail with an exception and the test would not start.
+ * Currently the following tasks take optional parameters:
+ *
+ * - AddDoc takes a numeric parameter, indicating the required size of
+ * added document. Note: if the DocMaker implementation used in the test
+ * does not support makeDoc(size), an exception would be thrown and the test
+ * would fail.
+ *
+ * - DeleteDoc takes numeric parameter, indicating the docid to be
+ * deleted. The latter is not very useful for loops, since the docid is
+ * fixed, so for deletion in loops it is better to use the
+ *
doc.delete.step
property.
+ *
+ * - SetProp takes a
name,value
mandatory param,
+ * ',' used as a separator.
+ *
+ * - SearchTravRetTask and SearchTravTask take a numeric
+ * parameter, indicating the required traversal size.
+ *
+ * - SearchTravRetLoadFieldSelectorTask takes a string
+ * parameter: a comma separated list of Fields to load.
+ *
+ * - SearchTravRetHighlighterTask takes a string
+ * parameter: a comma separated list of parameters to define highlighting. See that
+ * tasks javadocs for more information
+ *
+ *
+ *
Example - AddDoc(2000) - would add a document
+ * of size 2000 (~bytes).
+ *
See conf/task-sample.alg for how this can be used, for instance, to check
+ * which is faster, adding
+ * many smaller documents, or few larger documents.
+ * Next candidates for supporting a parameter may be the Search tasks,
+ * for controlling the query size.
+ *
+ * -
+ * Statistic recording elimination: - a sequence can also end with
+ * '>',
+ * in which case child tasks would not store their statistics.
+ * This can be useful to avoid exploding stats data, for adding say 1M docs.
+ *
Example - { "ManyAdds" AddDoc > : 1000000 -
+ * would add million docs, measure that total, but not save stats for each addDoc.
+ *
Notice that the granularity of System.currentTimeMillis() (which is used
+ * here) is system dependant,
+ * and in some systems an operation that takes 5 ms to complete may show 0 ms
+ * latency time in performance measurements.
+ * Therefore it is sometimes more accurate to look at the elapsed time of a larger
+ * sequence, as demonstrated here.
+ *
+ * -
+ * Rate:
+ * To set a rate (ops/sec or ops/min) for a sequence, add
+ * ': N : R' just after sequence closing tag.
+ * This would specify repetition of N with rate of R operations/sec.
+ * Use 'R/sec' or
+ * 'R/min'
+ * to explicitly specify that the rate is per second or per minute.
+ * The default is per second,
+ *
Example - [ AddDoc ] : 400 : 3 - would do 400
+ * addDoc in parallel, starting up to 3 threads per second.
+ *
Example - { AddDoc } : 100 : 200/min - would
+ * do 100 addDoc serially,
+ * waiting before starting next add, if otherwise rate would exceed 200 adds/min.
+ *
+ * -
+ * Disable Counting: Each task executed contributes to the records count.
+ * This count is reflected in reports under recs/s and under recsPerRun.
+ * Most tasks count 1, some count 0, and some count more.
+ * (See Results record counting clarified for more details.)
+ * It is possible to disable counting for a task by preceding it with -.
+ *
Example - -CreateIndex - would count 0 while
+ * the default behavior for CreateIndex is to count 1.
+ *
+ * -
+ * Command names: Each class "AnyNameTask" in the
+ * package org.apache.lucene.benchmark.byTask.tasks,
+ * that extends PerfTask, is supported as command "AnyName" that can be
+ * used in the benchmark "algorithm" description.
+ * This allows to add new commands by just adding such classes.
+ *
+ *
+ *
+ *
+ *
+ * Supported tasks/commands
+ *
+ *
+ * Existing tasks can be divided into a few groups:
+ * regular index/search work tasks, report tasks, and control tasks.
+ *
+ *
+ *
+ *
+ * -
+ * Report tasks: There are a few Report commands for generating reports.
+ * Only task runs that were completed are reported.
+ * (The 'Report tasks' themselves are not measured and not reported.)
+ *
+ * -
+ * RepAll - all (completed) task runs.
+ *
+ * -
+ * RepSumByName - all statistics,
+ * aggregated by name. So, if AddDoc was executed 2000 times,
+ * only 1 report line would be created for it, aggregating all those
+ * 2000 statistic records.
+ *
+ * -
+ * RepSelectByPref prefixWord - all
+ * records for tasks whose name start with
+ * prefixWord.
+ *
+ * -
+ * RepSumByPref prefixWord - all
+ * records for tasks whose name start with
+ * prefixWord,
+ * aggregated by their full task name.
+ *
+ * -
+ * RepSumByNameRound - all statistics,
+ * aggregated by name and by Round.
+ * So, if AddDoc was executed 2000 times in each of 3
+ * rounds, 3 report lines would be
+ * created for it,
+ * aggregating all those 2000 statistic records in each round.
+ * See more about rounds in the NewRound
+ * command description below.
+ *
+ * -
+ * RepSumByPrefRound prefixWord -
+ * similar to RepSumByNameRound,
+ * just that only tasks whose name starts with
+ * prefixWord are included.
+ *
+ *
+ * If needed, additional reports can be added by extending the abstract class
+ * ReportTask, and by
+ * manipulating the statistics data in Points and TaskStats.
+ *
+ *
+ * - Control tasks: Few of the tasks control the benchmark algorithm
+ * all over:
+ *
+ * -
+ * ClearStats - clears the entire statistics.
+ * Further reports would only include task runs that would start after this
+ * call.
+ *
+ * -
+ * NewRound - virtually start a new round of
+ * performance test.
+ * Although this command can be placed anywhere, it mostly makes sense at
+ * the end of an outermost sequence.
+ *
This increments a global "round counter". All task runs that
+ * would start now would
+ * record the new, updated round counter as their round number.
+ * This would appear in reports.
+ * In particular, see RepSumByNameRound above.
+ *
An additional effect of NewRound, is that numeric and boolean
+ * properties defined (at the head
+ * of the .alg file) as a sequence of values, e.g.
+ * merge.factor=mrg:10:100:10:100 would
+ * increment (cyclic) to the next value.
+ * Note: this would also be reflected in the reports, in this case under a
+ * column that would be named "mrg".
+ *
+ * -
+ * ResetInputs - DocMaker and the
+ * various QueryMakers
+ * would reset their counters to start.
+ * The way these Maker interfaces work, each call for makeDocument()
+ * or makeQuery() creates the next document or query
+ * that it "knows" to create.
+ * If that pool is "exhausted", the "maker" start over again.
+ * The ResetInputs command
+ * therefore allows to make the rounds comparable.
+ * It is therefore useful to invoke ResetInputs together with NewRound.
+ *
+ * -
+ * ResetSystemErase - reset all index
+ * and input data and call gc.
+ * Does NOT reset statistics. This contains ResetInputs.
+ * All writers/readers are nullified, deleted, closed.
+ * Index is erased.
+ * Directory is erased.
+ * You would have to call CreateIndex once this was called...
+ *
+ * -
+ * ResetSystemSoft - reset all
+ * index and input data and call gc.
+ * Does NOT reset statistics. This contains ResetInputs.
+ * All writers/readers are nullified, closed.
+ * Index is NOT erased.
+ * Directory is NOT erased.
+ * This is useful for testing performance on an existing index,
+ * for instance if the construction of a large index
+ * took a very long time and now you would to test
+ * its search or update performance.
+ *
+ *
+ *
+ *
+ * -
+ * Other existing tasks are quite straightforward and would
+ * just be briefly described here.
+ *
+ * -
+ * CreateIndex and
+ * OpenIndex both leave the
+ * index open for later update operations.
+ * CloseIndex would close it.
+ *
-
+ * OpenReader, similarly, would
+ * leave an index reader open for later search operations.
+ * But this have further semantics.
+ * If a Read operation is performed, and an open reader exists,
+ * it would be used.
+ * Otherwise, the read operation would open its own reader
+ * and close it when the read operation is done.
+ * This allows testing various scenarios - sharing a reader,
+ * searching with "cold" reader, with "warmed" reader, etc.
+ * The read operations affected by this are:
+ * Warm,
+ * Search,
+ * SearchTrav (search and traverse),
+ * and SearchTravRet (search
+ * and traverse and retrieve).
+ * Notice that each of the 3 search task types maintains
+ * its own queryMaker instance.
+ *
-
+ * CommitIndex and
+ * ForceMerge can be used to commit
+ * changes to the index then merge the index segments. The integer
+ * parameter specifies how many segments to merge down to (default
+ * 1).
+ *
-
+ * WriteLineDoc prepares a 'line'
+ * file where each line holds a document with title,
+ * date and body elements, separated by [TAB].
+ * A line file is useful if one wants to measure pure indexing
+ * performance, without the overhead of parsing the data.
+ * You can use LineDocSource as a ContentSource over a 'line'
+ * file.
+ * -
+ * ConsumeContentSource consumes
+ * a ContentSource. Useful for e.g. testing a ContentSource
+ * performance, without the overhead of preparing a Document
+ * out of it.
+ *
+ *
+ *
+ *
+ *
+ * Benchmark properties
+ *
+ *
+ * Properties are read from the header of the .alg file, and
+ * define several parameters of the performance test.
+ * As mentioned above for the NewRound task,
+ * numeric and boolean properties that are defined as a sequence
+ * of values, e.g. merge.factor=mrg:10:100:10:100
+ * would increment (cyclic) to the next value,
+ * when NewRound is called, and would also
+ * appear as a named column in the reports (column
+ * name would be "mrg" in this example).
+ *
+ *
+ *
+ * Some of the currently defined properties are:
+ *
+ *
+ *
+ * -
+ * analyzer - full
+ * class name for the analyzer to use.
+ * Same analyzer would be used in the entire test.
+ *
+ *
+ * -
+ * directory - valid values are
+ * This tells which directory to use for the performance test.
+ *
+ *
+ * -
+ * Index work parameters:
+ * Multi int/boolean values would be iterated with calls to NewRound.
+ * There would be also added as columns in the reports, first string in the
+ * sequence is the column name.
+ * (Make sure it is no shorter than any value in the sequence).
+ *
+ * - max.buffered
+ *
Example: max.buffered=buf:10:10:100:100 -
+ * this would define using maxBufferedDocs of 10 in iterations 0 and 1,
+ * and 100 in iterations 2 and 3.
+ *
+ * -
+ * merge.factor - which
+ * merge factor to use.
+ *
+ * -
+ * compound - whether the index is
+ * using the compound format or not. Valid values are "true" and "false".
+ *
+ *
+ *
+ *
+ *
+ * Here is a list of currently defined properties:
+ *
+ *
+ *
+ * - Root directory for data and indexes:
+ *
- work.dir (default is System property "benchmark.work.dir" or "work".)
+ *
+ *
+ *
+ * - Docs and queries creation:
+ *
- analyzer
+ *
- doc.maker
+ *
- content.source.forever
+ *
- html.parser
+ *
- doc.stored
+ *
- doc.tokenized
+ *
- doc.term.vector
+ *
- doc.term.vector.positions
+ *
- doc.term.vector.offsets
+ *
- doc.store.body.bytes
+ *
- docs.dir
+ *
- query.maker
+ *
- file.query.maker.file
+ *
- file.query.maker.default.field
+ *
- search.num.hits
+ *
+ *
+ *
+ * - Logging:
+ *
- log.step
+ *
- log.step.[class name]Task ie log.step.DeleteDoc (e.g. log.step.Wonderful for the WonderfulTask example above).
+ *
- log.queries
+ *
- task.max.depth.log
+ *
+ *
+ *
+ * - Index writing:
+ *
- compound
+ *
- merge.factor
+ *
- max.buffered
+ *
- directory
+ *
- ram.flush.mb
+ *
- codec.postingsFormat (eg Direct) Note: no codec should be specified through default.codec
+ *
+ *
+ *
+ * - Doc deletion:
+ *
+ *
+ *
+ * - Spatial: Numerous; see spatial.alg
+ *
+ *
+ * - Task alternative packages:
+ *
- alt.tasks.packages
+ * - comma separated list of additional packages where tasks classes will be looked for
+ * when not found in the default package (that of PerfTask). If the same task class
+ * appears in more than one package, the package indicated first in this list will be used.
+ *
+ *
+ *
+ *
+ *
+ *
+ * For sample use of these properties see the *.alg files under conf.
+ *
+ *
+ *
+ * Example input algorithm and the result benchmark report
+ *
+ * The following example is in conf/sample.alg:
+ *
+ * # --------------------------------------------------------
+ * #
+ * # Sample: what is the effect of doc size on indexing time?
+ * #
+ * # There are two parts in this test:
+ * # - PopulateShort adds 2N documents of length L
+ * # - PopulateLong adds N documents of length 2L
+ * # Which one would be faster?
+ * # The comparison is done twice.
+ * #
+ * # --------------------------------------------------------
+ *
+ * # -------------------------------------------------------------------------------------
+ * # multi val params are iterated by NewRound's, added to reports, start with column name.
+ * merge.factor=mrg:10:20
+ * max.buffered=buf:100:1000
+ * compound=true
+ *
+ * analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+ * directory=FSDirectory
+ *
+ * doc.stored=true
+ * doc.tokenized=true
+ * doc.term.vector=false
+ * doc.add.log.step=500
+ *
+ * docs.dir=reuters-out
+ *
+ * doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+ *
+ * query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+ *
+ * # task at this depth or less would print when they start
+ * task.max.depth.log=2
+ *
+ * log.queries=false
+ * # -------------------------------------------------------------------------------------
+ * {
+ *
+ * { "PopulateShort"
+ * CreateIndex
+ * { AddDoc(4000) > : 20000
+ * Optimize
+ * CloseIndex
+ * >
+ *
+ * ResetSystemErase
+ *
+ * { "PopulateLong"
+ * CreateIndex
+ * { AddDoc(8000) > : 10000
+ * Optimize
+ * CloseIndex
+ * >
+ *
+ * ResetSystemErase
+ *
+ * NewRound
+ *
+ * } : 2
+ *
+ * RepSumByName
+ * RepSelectByPref Populate
+ *
+ *
+ *
+ *
+ * The command line for running this sample:
+ *
ant run-task -Dtask.alg=conf/sample.alg
+ *
+ *
+ *
+ * The output report from running this test contains the following:
+ *
+ * Operation round mrg buf runCnt recsPerRun rec/s elapsedSec avgUsedMem avgTotalMem
+ * PopulateShort 0 10 100 1 20003 119.6 167.26 12,959,120 14,241,792
+ * PopulateLong - - 0 10 100 - - 1 - - 10003 - - - 74.3 - - 134.57 - 17,085,208 - 20,635,648
+ * PopulateShort 1 20 1000 1 20003 143.5 139.39 63,982,040 94,756,864
+ * PopulateLong - - 1 20 1000 - - 1 - - 10003 - - - 77.0 - - 129.92 - 87,309,608 - 100,831,232
+ *
+ *
+ *
+ * Results record counting clarified
+ *
+ * Two columns in the results table indicate records counts: records-per-run and
+ * records-per-second. What does it mean?
+ *
+ * Almost every task gets 1 in this count just for being executed.
+ * Task sequences aggregate the counts of their child tasks,
+ * plus their own count of 1.
+ * So, a task sequence containing 5 other task sequences, each running a single
+ * other task 10 times, would have a count of 1 + 5 * (1 + 10) = 56.
+ *
+ * The traverse and retrieve tasks "count" more: a traverse task
+ * would add 1 for each traversed result (hit), and a retrieve task would
+ * additionally add 1 for each retrieved doc. So, regular Search would
+ * count 1, SearchTrav that traverses 10 hits would count 11, and a
+ * SearchTravRet task that retrieves (and traverses) 10, would count 21.
+ *
+ * Confusing? this might help: always examine the elapsedSec
column,
+ * and always compare "apples to apples", .i.e. it is interesting to check how the
+ * rec/s
changed for the same task (or sequence) between two
+ * different runs, but it is not very useful to know how the rec/s
+ * differs between Search
and SearchTrav
tasks. For
+ * the latter, elapsedSec
would bring more insight.
+ *
+ */
+package org.apache.lucene.benchmark.byTask;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
deleted file mode 100644
index 7c0967180cb..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
+++ /dev/null
@@ -1,733 +0,0 @@
-
-
-
-
- Benchmarking Lucene By Tasks
-
-
-Benchmarking Lucene By Tasks.
-
-
-This package provides "task based" performance benchmarking of Lucene.
-One can use the predefined benchmarks, or create new ones.
-
-
-Contained packages:
-
-
-
-
- Package |
- Description |
-
-
- stats |
- Statistics maintained when running benchmark tasks. |
-
-
- tasks |
- Benchmark tasks. |
-
-
- feeds |
- Sources for benchmark inputs: documents and queries. |
-
-
- utils |
- Utilities used for the benchmark, and for the reports. |
-
-
- programmatic |
- Sample performance test written programmatically. |
-
-
-
-
Table Of Contents
-
-
- - Benchmarking By Tasks
- - How to use
- - Benchmark "algorithm"
- - Supported tasks/commands
- - Benchmark properties
- - Example input algorithm and the result benchmark
- report.
- - Results record counting clarified
-
-
-
-
Benchmarking By Tasks
-
-Benchmark Lucene using task primitives.
-
-
-
-A benchmark is composed of some predefined tasks, allowing for creating an
-index, adding documents,
-optimizing, searching, generating reports, and more. A benchmark run takes an
-"algorithm" file
-that contains a description of the sequence of tasks making up the run, and some
-properties defining a few
-additional characteristics of the benchmark run.
-
-
-
-
How to use
-
-Easiest way to run a benchmarks is using the predefined ant task:
-
- - ant run-task
-
- would run the micro-standard.alg
"algorithm".
-
- - ant run-task -Dtask.alg=conf/compound-penalty.alg
-
- would run the compound-penalty.alg
"algorithm".
-
- - ant run-task -Dtask.alg=[full-path-to-your-alg-file]
-
- would run your perf test
"algorithm".
-
- - java org.apache.lucene.benchmark.byTask.programmatic.Sample
-
- would run a performance test programmatically - without using an alg
- file. This is less readable, and less convenient, but possible.
-
-
-
-
-
-You may find existing tasks sufficient for defining the benchmark you
-need, otherwise, you can extend the framework to meet your needs, as explained
-herein.
-
-
-
-Each benchmark run has a DocMaker and a QueryMaker. These two should usually
-match, so that "meaningful" queries are used for a certain collection.
-Properties set at the header of the alg file define which "makers" should be
-used. You can also specify your own makers, extending DocMaker and implementing
-QueryMaker.
-
- Note: since 2.9, DocMaker is a concrete class which accepts a
- ContentSource. In most cases, you can use the DocMaker class to create
- Documents, while providing your own ContentSource implementation. For
- example, the current Benchmark package includes ContentSource
- implementations for TREC, Enwiki and Reuters collections, as well as
- others like LineDocSource which reads a 'line' file produced by
- WriteLineDocTask.
-
-
-
-
-Benchmark .alg file contains the benchmark "algorithm". The syntax is described
-below. Within the algorithm, you can specify groups of commands, assign them
-names, specify commands that should be repeated,
-do commands in serial or in parallel,
-and also control the speed of "firing" the commands.
-
-
-
-This allows, for instance, to specify
-that an index should be opened for update,
-documents should be added to it one by one but not faster than 20 docs a minute,
-and, in parallel with this,
-some N queries should be searched against that index,
-again, no more than 2 queries a second.
-You can have the searches all share an index reader,
-or have them each open its own reader and close it afterwords.
-
-
-
-If the commands available for use in the algorithm do not meet your needs,
-you can add commands by adding a new task under
-org.apache.lucene.benchmark.byTask.tasks -
-you should extend the PerfTask abstract class.
-Make sure that your new task class name is suffixed by Task.
-Assume you added the class "WonderfulTask" - doing so also enables the
-command "Wonderful" to be used in the algorithm.
-
-
-
-External classes: It is sometimes useful to invoke the benchmark
-package with your external alg file that configures the use of your own
-doc/query maker and or html parser. You can work this out without
-modifying the benchmark package code, by passing your class path
-with the benchmark.ext.classpath property:
-
- - ant run-task -Dtask.alg=[full-path-to-your-alg-file]
- -Dbenchmark.ext.classpath=/mydir/classes
- -Dtask.mem=512M
-
-
External tasks: When writing your own tasks under a package other than
-
org.apache.lucene.benchmark.byTask.tasks specify that package thru the
-
alt.tasks.packages property.
-
-
-
-
Benchmark "algorithm"
-
-
-The following is an informal description of the supported syntax.
-
-
-
- -
- Measuring: When a command is executed, statistics for the elapsed
- execution time and memory consumption are collected.
- At any time, those statistics can be printed, using one of the
- available ReportTasks.
-
- -
- Comments start with '#'.
-
- -
- Serial sequences are enclosed within '{ }'.
-
- -
- Parallel sequences are enclosed within
- '[ ]'
-
- -
- Sequence naming: To name a sequence, put
- '"name"' just after
- '{' or '['.
-
Example - { "ManyAdds" AddDoc } : 1000000 -
- would
- name the sequence of 1M add docs "ManyAdds", and this name would later appear
- in statistic reports.
- If you don't specify a name for a sequence, it is given one: you can see it as
- the algorithm is printed just before benchmark execution starts.
-
- -
- Repeating:
- To repeat sequence tasks N times, add ': N' just
- after the
- sequence closing tag - '}' or
- ']' or '>'.
-
Example - [ AddDoc ] : 4 - would do 4 addDoc
- in parallel, spawning 4 threads at once.
-
Example - [ AddDoc AddDoc ] : 4 - would do
- 8 addDoc in parallel, spawning 8 threads at once.
-
Example - { AddDoc } : 30 - would do addDoc
- 30 times in a row.
-
Example - { AddDoc AddDoc } : 30 - would do
- addDoc 60 times in a row.
-
Exhaustive repeating: use * instead of
- a number to repeat exhaustively.
- This is sometimes useful, for adding as many files as a doc maker can create,
- without iterating over the same file again, especially when the exact
- number of documents is not known in advance. For instance, TREC files extracted
- from a zip file. Note: when using this, you must also set
- content.source.forever to false.
-
Example - { AddDoc } : * - would add docs
- until the doc maker is "exhausted".
-
- -
- Command parameter: a command can optionally take a single parameter.
- If the certain command does not support a parameter, or if the parameter is of
- the wrong type,
- reading the algorithm will fail with an exception and the test would not start.
- Currently the following tasks take optional parameters:
-
- - AddDoc takes a numeric parameter, indicating the required size of
- added document. Note: if the DocMaker implementation used in the test
- does not support makeDoc(size), an exception would be thrown and the test
- would fail.
-
- - DeleteDoc takes numeric parameter, indicating the docid to be
- deleted. The latter is not very useful for loops, since the docid is
- fixed, so for deletion in loops it is better to use the
-
doc.delete.step
property.
-
- - SetProp takes a
name,value
mandatory param,
- ',' used as a separator.
-
- - SearchTravRetTask and SearchTravTask take a numeric
- parameter, indicating the required traversal size.
-
- - SearchTravRetLoadFieldSelectorTask takes a string
- parameter: a comma separated list of Fields to load.
-
- - SearchTravRetHighlighterTask takes a string
- parameter: a comma separated list of parameters to define highlighting. See that
- tasks javadocs for more information
-
-
-
Example - AddDoc(2000) - would add a document
- of size 2000 (~bytes).
-
See conf/task-sample.alg for how this can be used, for instance, to check
- which is faster, adding
- many smaller documents, or few larger documents.
- Next candidates for supporting a parameter may be the Search tasks,
- for controlling the query size.
-
- -
- Statistic recording elimination: - a sequence can also end with
- '>',
- in which case child tasks would not store their statistics.
- This can be useful to avoid exploding stats data, for adding say 1M docs.
-
Example - { "ManyAdds" AddDoc > : 1000000 -
- would add million docs, measure that total, but not save stats for each addDoc.
-
Notice that the granularity of System.currentTimeMillis() (which is used
- here) is system dependant,
- and in some systems an operation that takes 5 ms to complete may show 0 ms
- latency time in performance measurements.
- Therefore it is sometimes more accurate to look at the elapsed time of a larger
- sequence, as demonstrated here.
-
- -
- Rate:
- To set a rate (ops/sec or ops/min) for a sequence, add
- ': N : R' just after sequence closing tag.
- This would specify repetition of N with rate of R operations/sec.
- Use 'R/sec' or
- 'R/min'
- to explicitly specify that the rate is per second or per minute.
- The default is per second,
-
Example - [ AddDoc ] : 400 : 3 - would do 400
- addDoc in parallel, starting up to 3 threads per second.
-
Example - { AddDoc } : 100 : 200/min - would
- do 100 addDoc serially,
- waiting before starting next add, if otherwise rate would exceed 200 adds/min.
-
- -
- Disable Counting: Each task executed contributes to the records count.
- This count is reflected in reports under recs/s and under recsPerRun.
- Most tasks count 1, some count 0, and some count more.
- (See Results record counting clarified for more details.)
- It is possible to disable counting for a task by preceding it with -.
-
Example - -CreateIndex - would count 0 while
- the default behavior for CreateIndex is to count 1.
-
- -
- Command names: Each class "AnyNameTask" in the
- package org.apache.lucene.benchmark.byTask.tasks,
- that extends PerfTask, is supported as command "AnyName" that can be
- used in the benchmark "algorithm" description.
- This allows to add new commands by just adding such classes.
-
-
-
-
-
-
Supported tasks/commands
-
-
-Existing tasks can be divided into a few groups:
-regular index/search work tasks, report tasks, and control tasks.
-
-
-
-
- -
- Report tasks: There are a few Report commands for generating reports.
- Only task runs that were completed are reported.
- (The 'Report tasks' themselves are not measured and not reported.)
-
- -
- RepAll - all (completed) task runs.
-
- -
- RepSumByName - all statistics,
- aggregated by name. So, if AddDoc was executed 2000 times,
- only 1 report line would be created for it, aggregating all those
- 2000 statistic records.
-
- -
- RepSelectByPref prefixWord - all
- records for tasks whose name start with
- prefixWord.
-
- -
- RepSumByPref prefixWord - all
- records for tasks whose name start with
- prefixWord,
- aggregated by their full task name.
-
- -
- RepSumByNameRound - all statistics,
- aggregated by name and by Round.
- So, if AddDoc was executed 2000 times in each of 3
- rounds, 3 report lines would be
- created for it,
- aggregating all those 2000 statistic records in each round.
- See more about rounds in the NewRound
- command description below.
-
- -
- RepSumByPrefRound prefixWord -
- similar to RepSumByNameRound,
- just that only tasks whose name starts with
- prefixWord are included.
-
-
- If needed, additional reports can be added by extending the abstract class
- ReportTask, and by
- manipulating the statistics data in Points and TaskStats.
-
-
- - Control tasks: Few of the tasks control the benchmark algorithm
- all over:
-
- -
- ClearStats - clears the entire statistics.
- Further reports would only include task runs that would start after this
- call.
-
- -
- NewRound - virtually start a new round of
- performance test.
- Although this command can be placed anywhere, it mostly makes sense at
- the end of an outermost sequence.
-
This increments a global "round counter". All task runs that
- would start now would
- record the new, updated round counter as their round number.
- This would appear in reports.
- In particular, see RepSumByNameRound above.
-
An additional effect of NewRound, is that numeric and boolean
- properties defined (at the head
- of the .alg file) as a sequence of values, e.g.
- merge.factor=mrg:10:100:10:100 would
- increment (cyclic) to the next value.
- Note: this would also be reflected in the reports, in this case under a
- column that would be named "mrg".
-
- -
- ResetInputs - DocMaker and the
- various QueryMakers
- would reset their counters to start.
- The way these Maker interfaces work, each call for makeDocument()
- or makeQuery() creates the next document or query
- that it "knows" to create.
- If that pool is "exhausted", the "maker" start over again.
- The ResetInputs command
- therefore allows to make the rounds comparable.
- It is therefore useful to invoke ResetInputs together with NewRound.
-
- -
- ResetSystemErase - reset all index
- and input data and call gc.
- Does NOT reset statistics. This contains ResetInputs.
- All writers/readers are nullified, deleted, closed.
- Index is erased.
- Directory is erased.
- You would have to call CreateIndex once this was called...
-
- -
- ResetSystemSoft - reset all
- index and input data and call gc.
- Does NOT reset statistics. This contains ResetInputs.
- All writers/readers are nullified, closed.
- Index is NOT erased.
- Directory is NOT erased.
- This is useful for testing performance on an existing index,
- for instance if the construction of a large index
- took a very long time and now you would to test
- its search or update performance.
-
-
-
-
- -
- Other existing tasks are quite straightforward and would
- just be briefly described here.
-
- -
- CreateIndex and
- OpenIndex both leave the
- index open for later update operations.
- CloseIndex would close it.
-
-
- OpenReader, similarly, would
- leave an index reader open for later search operations.
- But this have further semantics.
- If a Read operation is performed, and an open reader exists,
- it would be used.
- Otherwise, the read operation would open its own reader
- and close it when the read operation is done.
- This allows testing various scenarios - sharing a reader,
- searching with "cold" reader, with "warmed" reader, etc.
- The read operations affected by this are:
- Warm,
- Search,
- SearchTrav (search and traverse),
- and SearchTravRet (search
- and traverse and retrieve).
- Notice that each of the 3 search task types maintains
- its own queryMaker instance.
-
-
- CommitIndex and
- ForceMerge can be used to commit
- changes to the index then merge the index segments. The integer
- parameter specifies how many segments to merge down to (default
- 1).
-
-
- WriteLineDoc prepares a 'line'
- file where each line holds a document with title,
- date and body elements, separated by [TAB].
- A line file is useful if one wants to measure pure indexing
- performance, without the overhead of parsing the data.
- You can use LineDocSource as a ContentSource over a 'line'
- file.
- -
- ConsumeContentSource consumes
- a ContentSource. Useful for e.g. testing a ContentSource
- performance, without the overhead of preparing a Document
- out of it.
-
-
-
-
-
-
Benchmark properties
-
-
-Properties are read from the header of the .alg file, and
-define several parameters of the performance test.
-As mentioned above for the NewRound task,
-numeric and boolean properties that are defined as a sequence
-of values, e.g. merge.factor=mrg:10:100:10:100
-would increment (cyclic) to the next value,
-when NewRound is called, and would also
-appear as a named column in the reports (column
-name would be "mrg" in this example).
-
-
-
-Some of the currently defined properties are:
-
-
-
- -
- analyzer - full
- class name for the analyzer to use.
- Same analyzer would be used in the entire test.
-
-
- -
- directory - valid values are
- This tells which directory to use for the performance test.
-
-
- -
- Index work parameters:
- Multi int/boolean values would be iterated with calls to NewRound.
- There would be also added as columns in the reports, first string in the
- sequence is the column name.
- (Make sure it is no shorter than any value in the sequence).
-
- - max.buffered
-
Example: max.buffered=buf:10:10:100:100 -
- this would define using maxBufferedDocs of 10 in iterations 0 and 1,
- and 100 in iterations 2 and 3.
-
- -
- merge.factor - which
- merge factor to use.
-
- -
- compound - whether the index is
- using the compound format or not. Valid values are "true" and "false".
-
-
-
-
-
-Here is a list of currently defined properties:
-
-
-
- - Root directory for data and indexes:
- - work.dir (default is System property "benchmark.work.dir" or "work".)
-
-
-
- - Docs and queries creation:
- - analyzer
-
- doc.maker
-
- content.source.forever
-
- html.parser
-
- doc.stored
-
- doc.tokenized
-
- doc.term.vector
-
- doc.term.vector.positions
-
- doc.term.vector.offsets
-
- doc.store.body.bytes
-
- docs.dir
-
- query.maker
-
- file.query.maker.file
-
- file.query.maker.default.field
-
- search.num.hits
-
-
-
- - Logging:
-
- log.step
-
- log.step.[class name]Task ie log.step.DeleteDoc (e.g. log.step.Wonderful for the WonderfulTask example above).
-
- log.queries
-
- task.max.depth.log
-
-
-
- - Index writing:
-
- compound
-
- merge.factor
-
- max.buffered
-
- directory
-
- ram.flush.mb
-
- codec.postingsFormat (eg Direct) Note: no codec should be specified through default.codec
-
-
-
- - Doc deletion:
-
-
-
- - Spatial: Numerous; see spatial.alg
-
-
- - Task alternative packages:
-
- alt.tasks.packages
- - comma separated list of additional packages where tasks classes will be looked for
- when not found in the default package (that of PerfTask). If the same task class
- appears in more than one package, the package indicated first in this list will be used.
-
-
-
-
-
-
-For sample use of these properties see the *.alg files under conf.
-
-
-
-
Example input algorithm and the result benchmark report
-
-The following example is in conf/sample.alg:
-
-# --------------------------------------------------------
-#
-# Sample: what is the effect of doc size on indexing time?
-#
-# There are two parts in this test:
-# - PopulateShort adds 2N documents of length L
-# - PopulateLong adds N documents of length 2L
-# Which one would be faster?
-# The comparison is done twice.
-#
-# --------------------------------------------------------
-
-# -------------------------------------------------------------------------------------
-# multi val params are iterated by NewRound's, added to reports, start with column name.
-merge.factor=mrg:10:20
-max.buffered=buf:100:1000
-compound=true
-
-analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
-directory=FSDirectory
-
-doc.stored=true
-doc.tokenized=true
-doc.term.vector=false
-doc.add.log.step=500
-
-docs.dir=reuters-out
-
-doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
-
-query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
-
-# task at this depth or less would print when they start
-task.max.depth.log=2
-
-log.queries=false
-# -------------------------------------------------------------------------------------
-{
-
- { "PopulateShort"
- CreateIndex
- { AddDoc(4000) > : 20000
- Optimize
- CloseIndex
- >
-
- ResetSystemErase
-
- { "PopulateLong"
- CreateIndex
- { AddDoc(8000) > : 10000
- Optimize
- CloseIndex
- >
-
- ResetSystemErase
-
- NewRound
-
-} : 2
-
-RepSumByName
-RepSelectByPref Populate
-
-
-
-
-
-The command line for running this sample:
-
ant run-task -Dtask.alg=conf/sample.alg
-
-
-
-The output report from running this test contains the following:
-
-Operation round mrg buf runCnt recsPerRun rec/s elapsedSec avgUsedMem avgTotalMem
-PopulateShort 0 10 100 1 20003 119.6 167.26 12,959,120 14,241,792
-PopulateLong - - 0 10 100 - - 1 - - 10003 - - - 74.3 - - 134.57 - 17,085,208 - 20,635,648
-PopulateShort 1 20 1000 1 20003 143.5 139.39 63,982,040 94,756,864
-PopulateLong - - 1 20 1000 - - 1 - - 10003 - - - 77.0 - - 129.92 - 87,309,608 - 100,831,232
-
-
-
-
-
Results record counting clarified
-
-Two columns in the results table indicate records counts: records-per-run and
-records-per-second. What does it mean?
-
-Almost every task gets 1 in this count just for being executed.
-Task sequences aggregate the counts of their child tasks,
-plus their own count of 1.
-So, a task sequence containing 5 other task sequences, each running a single
-other task 10 times, would have a count of 1 + 5 * (1 + 10) = 56.
-
-The traverse and retrieve tasks "count" more: a traverse task
-would add 1 for each traversed result (hit), and a retrieve task would
-additionally add 1 for each retrieved doc. So, regular Search would
-count 1, SearchTrav that traverses 10 hits would count 11, and a
-SearchTravRet task that retrieves (and traverses) 10, would count 21.
-
-Confusing? this might help: always examine the elapsedSec
column,
-and always compare "apples to apples", .i.e. it is interesting to check how the
-rec/s
changed for the same task (or sequence) between two
-different runs, but it is not very useful to know how the rec/s
-differs between Search
and SearchTrav
tasks. For
-the latter, elapsedSec
would bring more insight.
-
-
-
-
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java
new file mode 100644
index 00000000000..d9dfd44ff9b
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Sample performance test written programmatically - no algorithm file is needed here.
+ */
+package org.apache.lucene.benchmark.byTask.programmatic;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html
deleted file mode 100644
index 7221c425645..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/programmatic/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Sample performance test written programmatically - no algorithm file is needed here.
-
-
\ No newline at end of file
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java
new file mode 100644
index 00000000000..2bc0751b405
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Statistics maintained when running benchmark tasks.
+ */
+package org.apache.lucene.benchmark.byTask.stats;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html
deleted file mode 100644
index fb44623f5e5..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
- Statistics maintained when running benchmark tasks.
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java
new file mode 100644
index 00000000000..d06f4848a72
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Extendable benchmark tasks.
+ */
+package org.apache.lucene.benchmark.byTask.tasks;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html
deleted file mode 100644
index 9c17edc3c2c..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/package.html
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-
-
-
-
-
-
-Extendable benchmark tasks.
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java
new file mode 100644
index 00000000000..4066a7a98af
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities used for the benchmark, and for the reports.
+ */
+package org.apache.lucene.benchmark.byTask.utils;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html
deleted file mode 100644
index 6a71c2fd39a..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Utilities used for the benchmark, and for the reports.
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java
new file mode 100644
index 00000000000..2df570b12ad
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package-info.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Lucene Benchmarking Package
+ *
+ * The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.
+ *
+ * ANT will download the corpus automatically, place it in a temp directory and then unpack it to the
+ * working.dir directory specified in the build. The temp directory and working directory can be safely
+ * removed after a run. However, the next time the task is run, it will need to download the files again.
+ *
+ * Classes implementing the Benchmarker interface should have a no-argument constructor if they are to be
+ * used with the Driver class. The Driver class is provided for convenience only. Feel free to implement
+ * your own main class for your benchmarker.
+ *
+ * The StandardBenchmarker is meant to be just that, a standard that runs out of the box with no
+ * configuration or changes needed. Other benchmarking classes may derive from it to provide alternate
+ * views or to take in command line options. When reporting benchmarking runs you should state any
+ * alterations you have made.
+ *
+ * To run the short version of the StandardBenchmarker, call "ant run-micro-standard". This should take
+ * a minute or so to complete and give you a preliminary idea of how your change affects the code.
+ *
+ * To run the long version of the StandardBenchmarker, call "ant run-standard". This takes considerably longer.
+ *
+ * The original code for these classes was donated by Andrzej Bialecki at http://issues.apache.org/jira/browse/LUCENE-675
+ * and has been updated by Grant Ingersoll to make some parts of the code reusable in other benchmarkers
+ */
+package org.apache.lucene.benchmark;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html
deleted file mode 100644
index 2daf1865fd4..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
- Lucene Benchmarking Package
-
-
-The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.
-
-
- ANT will
- download the corpus automatically, place it in a temp directory and then unpack it to the working.dir directory specified in the build.
- The temp directory
- and working directory can be safely removed after a run. However, the next time the task is run, it will need to download the files again.
-
- Classes implementing the Benchmarker interface should have a no-argument constructor if they are to be used with the Driver class. The Driver
- class is provided for convenience only. Feel free to implement your own main class for your benchmarker.
-
- The StandardBenchmarker is meant to be just that, a standard that runs out of the box with no configuration or changes needed.
- Other benchmarking classes may derive from it to provide alternate views or to take in command line options. When reporting benchmarking runs
- you should state any alterations you have made.
-
- To run the short version of the StandardBenchmarker, call "ant run-micro-standard". This should take a minute or so to complete and give you a preliminary idea of how your change affects the code
-
- To run the long version of the StandardBenchmarker, call "ant run-standard". This takes considerably longer.
-
- The original code for these classes was donated by Andrzej Bialecki at http://issues.apache.org/jira/browse/LUCENE-675 and has been updated by Grant Ingersoll to make some parts of the code reusable in other benchmarkers
-
-
-
-
\ No newline at end of file
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java
new file mode 100755
index 00000000000..de19b3c5bec
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package-info.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Search Quality Benchmarking.
+ *
+ * This package allows to benchmark search quality of a Lucene application.
+ *
+ * In order to use this package you should provide:
+ *
+ * - A IndexSearcher.
+ * - {@link org.apache.lucene.benchmark.quality.QualityQuery Quality queries}.
+ * - {@link org.apache.lucene.benchmark.quality.Judge Judging object}.
+ * - {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport Reporting object}.
+ *
+ *
+ * For benchmarking TREC collections with TREC QRels, take a look at the
+ * {@link org.apache.lucene.benchmark.quality.trec trec package}.
+ *
+ * Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection:
+ *
+ *
+ * File topicsFile = new File("topics-701-850.txt");
+ * File qrelsFile = new File("qrels-701-850.txt");
+ * IndexReader ir = DirectoryReader.open(directory):
+ * IndexSearcher searcher = new IndexSearcher(ir);
+ *
+ * int maxResults = 1000;
+ * String docNameField = "docname";
+ *
+ * PrintWriter logger = new PrintWriter(System.out,true);
+ *
+ * // use trec utilities to read trec topics into quality queries
+ * TrecTopicsReader qReader = new TrecTopicsReader();
+ * QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+ *
+ * // prepare judge, with trec utilities that read from a QRels file
+ * Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+ *
+ * // validate topics & judgments match each other
+ * judge.validateData(qqs, logger);
+ *
+ * // set the parsing of quality queries into Lucene queries.
+ * QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+ *
+ * // run the benchmark
+ * QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+ * SubmissionReport submitLog = null;
+ * QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+ *
+ * // print an average sum of the results
+ * QualityStats avg = QualityStats.average(stats);
+ * avg.log("SUMMARY",2,logger, " ");
+ *
+ *
+ *
+ * Some immediate ways to modify this program to your needs are:
+ *
+ * - To run on different formats of queries and judgements provide your own
+ * {@link org.apache.lucene.benchmark.quality.Judge Judge} and
+ * {@link org.apache.lucene.benchmark.quality.QualityQuery Quality queries}.
+ * - Create sophisticated Lucene queries by supplying a different
+ * {@link org.apache.lucene.benchmark.quality.QualityQueryParser Quality query parser}.
+ *
+ */
+package org.apache.lucene.benchmark.quality;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
deleted file mode 100755
index 10afab9c30f..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-
-
-Search Quality Benchmarking.
-
-This package allows to benchmark search quality of a Lucene application.
-
-In order to use this package you should provide:
-
-
-For benchmarking TREC collections with TREC QRels, take a look at the
-trec package.
-
-Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection:
-
-
- File topicsFile = new File("topics-701-850.txt");
- File qrelsFile = new File("qrels-701-850.txt");
- IndexReader ir = DirectoryReader.open(directory):
- IndexSearcher searcher = new IndexSearcher(ir);
-
- int maxResults = 1000;
- String docNameField = "docname";
-
- PrintWriter logger = new PrintWriter(System.out,true);
-
- // use trec utilities to read trec topics into quality queries
- TrecTopicsReader qReader = new TrecTopicsReader();
- QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
-
- // prepare judge, with trec utilities that read from a QRels file
- Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
-
- // validate topics & judgments match each other
- judge.validateData(qqs, logger);
-
- // set the parsing of quality queries into Lucene queries.
- QualityQueryParser qqParser = new SimpleQQParser("title", "body");
-
- // run the benchmark
- QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
- SubmissionReport submitLog = null;
- QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
-
- // print an avarage sum of the results
- QualityStats avg = QualityStats.average(stats);
- avg.log("SUMMARY",2,logger, " ");
-
-
-
-Some immediate ways to modify this program to your needs are:
-
-
-
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java
new file mode 100755
index 00000000000..45ddec80a5c
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs.
+ */
+package org.apache.lucene.benchmark.quality.trec;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
deleted file mode 100755
index dafccb7fc8a..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs.
-
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java
new file mode 100755
index 00000000000..56d721d4dca
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Miscellaneous utilities for search quality benchmarking: query parsing, submission reports.
+ */
+package org.apache.lucene.benchmark.quality.utils;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
deleted file mode 100755
index 7fde1a86c63..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-Miscellaneous utilities for search quality benchmarking: query parsing, submission reports.
-
-
-
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java
new file mode 100644
index 00000000000..956b84fa927
--- /dev/null
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Benchmark Utility functions.
+ */
+package org.apache.lucene.benchmark.utils;
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html
deleted file mode 100644
index 06cfbee86b4..00000000000
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/package.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-Benchmark Utility functions.
-
-