Code style and Javadoc nits.

2021-07-11 16:46:11 +02:00 · 2021-07-11 16:46:11 +02:00 · 693d560427
parent 509f50b0a5
commit 693d560427
2 changed files with 78 additions and 88 deletions
--- a/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
+++ b/commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistribution.java
@ -46,40 +46,46 @@ import org.apache.commons.math4.legacy.core.jdkmath.AccurateMath;

 /**
 * <p>Represents an <a href="http://en.wikipedia.org/wiki/Empirical_distribution_function">
- * empirical probability distribution</a> -- a probability distribution derived
+ * empirical probability distribution</a>: Probability distribution derived
 * from observed data without making any assumptions about the functional form
 * of the population distribution that the data come from.</p>
 *
- * <p>An <code>EmpiricalDistribution</code> maintains data structures, called
- * <i>distribution digests</i>, that describe empirical distributions and
- * support the following operations: <ul>
- * <li>loading the distribution from a file of observed data values</li>
- * <li>dividing the input data into "bin ranges" and reporting bin frequency
- *     counts (data for histogram)</li>
- * <li>reporting univariate statistics describing the full set of data values
- *     as well as the observations within each bin</li>
- * <li>generating random values from the distribution</li>
+ * <p>An {@code EmpiricalDistribution} maintains data structures called
+ * <i>distribution digests</i> that describe empirical distributions and
+ * support the following operations:
+ * <ul>
+ *  <li>loading the distribution from a file of observed data values</li>
+ *  <li>dividing the input data into "bin ranges" and reporting bin frequency
+ *      counts (data for histogram)</li>
+ *  <li>reporting univariate statistics describing the full set of data values
+ *      as well as the observations within each bin</li>
+ *  <li>generating random values from the distribution</li>
 * </ul>
- * Applications can use <code>EmpiricalDistribution</code> to build grouped
+ *
+ * Applications can use {@code EmpiricalDistribution} to build grouped
 * frequency histograms representing the input data or to generate random values
- * "like" those in the input file -- i.e., the values generated will follow the
+ * "like" those in the input file, i.e. the values generated will follow the
 * distribution of the values in the file.
 *
 * <p>The implementation uses what amounts to the
 * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
 * Variable Kernel Method</a> with Gaussian smoothing:<p>
 * <strong>Digesting the input file</strong>
- * <ol><li>Pass the file once to compute min and max.</li>
- * <li>Divide the range from min-max into <code>binCount</code> "bins."</li>
- * <li>Pass the data file again, computing bin counts and univariate
- *     statistics (mean, std dev.) for each of the bins </li>
- * <li>Divide the interval (0,1) into subintervals associated with the bins,
- *     with the length of a bin's subinterval proportional to its count.</li></ol>
- * <strong>Generating random values from the distribution</strong><ol>
- * <li>Generate a uniformly distributed value in (0,1) </li>
- * <li>Select the subinterval to which the value belongs.
- * <li>Generate a random Gaussian value with mean = mean of the associated
- *     bin and std dev = std dev of associated bin.</li></ol>
+ * <ol>
+ *  <li>Pass the file once to compute min and max.</li>
+ *  <li>Divide the range from min to max into {@code binCount} bins.</li>
+ *  <li>Pass the data file again, computing bin counts and univariate
+ *      statistics (mean and std dev.) for each bin.</li>
+ *  <li>Divide the interval (0,1) into subintervals associated with the bins,
+ *      with the length of a bin's subinterval proportional to its count.</li>
+ * </ol>
+ * <strong>Generating random values from the distribution</strong>
+ * <ol>
+ *  <li>Generate a uniformly distributed value in (0,1) </li>
+ *  <li>Select the subinterval to which the value belongs.
+ *  <li>Generate a random Gaussian value with mean = mean of the associated
+ *      bin and std dev = std dev of associated bin.</li>
+ * </ol>
 *
 * <p>EmpiricalDistribution implements the {@link ContinuousDistribution} interface
 * as follows.  Given x within the range of values in the dataset, let B
@ -91,49 +97,38 @@ import org.apache.commons.math4.legacy.core.jdkmath.AccurateMath;
 * grouped frequency distribution at the bin endpoints and interpolates within
 * bins using within-bin kernels.</p>
 *
- *<strong>USAGE NOTES:</strong><ul>
- *<li>The <code>binCount</code> is set by default to 1000.  A good rule of thumb
- *    is to set the bin count to approximately the length of the input file divided
- *    by 10. </li>
- *<li>The input file <i>must</i> be a plain text file containing one valid numeric
- *    entry per line.</li>
+ * <strong>USAGE NOTES:</strong>
+ * <ul>
+ * <li>The {@code binCount} is set by default to 1000.  A good rule of thumb
+ *     is to set the bin count to approximately the length of the input file divided
+ *     by 10. </li>
+ * <li>The input file <i>must</i> be a plain text file containing one valid numeric
+ *     entry per line.</li>
 * </ul>
- *
 */
 public class EmpiricalDistribution extends AbstractRealDistribution
    implements ContinuousDistribution {
-
    /** Default bin count. */
    public static final int DEFAULT_BIN_COUNT = 1000;
-
    /** Character set for file input. */
    private static final String FILE_CHARSET = "US-ASCII";
-
    /** Serializable version identifier. */
    private static final long serialVersionUID = 5729073523949762654L;
-
-    /** List of SummaryStatistics objects characterizing the bins. */
+    /** Bins' characteristics. */
    private final List<SummaryStatistics> binStats;
-
    /** Sample statistics. */
    private SummaryStatistics sampleStats;
-
    /** Max loaded value. */
    private double max = Double.NEGATIVE_INFINITY;
-
    /** Min loaded value. */
    private double min = Double.POSITIVE_INFINITY;
-
    /** Grid size. */
    private double delta;
-
-    /** number of bins. */
+    /** Number of bins. */
    private final int binCount;
-
-    /** is the distribution loaded? */
+    /** Whether the distribution is loaded. */
    private boolean loaded;
-
-    /** upper bounds of subintervals in (0,1) "belonging" to the bins. */
+    /** Upper bounds of subintervals in (0,1) belonging to the bins. */
    private double[] upperBounds;

    /**
@ -247,11 +242,10 @@ public class EmpiricalDistribution extends AbstractRealDistribution
    }

    /**
-     * Provides methods for computing <code>sampleStats</code> and
-     * <code>beanStats</code> abstracting the source of data.
+     * Provides methods for computing {@code sampleStats} and
+     * {@code beanStats} abstracting the source of data.
     */
-    private abstract class DataAdapter{
-
+    private abstract class DataAdapter {
        /**
         * Compute bin stats.
         *
@ -265,16 +259,14 @@ public class EmpiricalDistribution extends AbstractRealDistribution
         * @throws IOException if an error occurs computing sample stats
         */
        public abstract void computeStats() throws IOException;
-
    }

    /**
-     * <code>DataAdapter</code> for data provided through some input stream.
+     * {@code DataAdapter} for data provided through some input stream.
     */
-    private class StreamDataAdapter extends DataAdapter{
-
+    private class StreamDataAdapter extends DataAdapter {
        /** Input stream providing access to the data. */
-        private BufferedReader inputStream;
+        private final BufferedReader inputStream;

        /**
         * Create a StreamDataAdapter from a BufferedReader.
@ -282,7 +274,6 @@ public class EmpiricalDistribution extends AbstractRealDistribution
         * @param in BufferedReader input stream
         */
        StreamDataAdapter(BufferedReader in){
-            super();
            inputStream = in;
        }

@ -298,7 +289,6 @@ public class EmpiricalDistribution extends AbstractRealDistribution
            }

            inputStream.close();
-            inputStream = null;
        }

        /** {@inheritDoc} */
@ -312,15 +302,13 @@ public class EmpiricalDistribution extends AbstractRealDistribution
                sampleStats.addValue(val);
            }
            inputStream.close();
-            inputStream = null;
        }
    }

    /**
-     * <code>DataAdapter</code> for data provided as array of doubles.
+     * {@code DataAdapter} for data provided as array of doubles.
     */
    private class ArrayDataAdapter extends DataAdapter {
-
        /** Array of input  data values. */
        private final double[] inputArray;

@ -331,7 +319,6 @@ public class EmpiricalDistribution extends AbstractRealDistribution
         * @throws NullArgumentException if in is null
         */
        ArrayDataAdapter(double[] in) {
-            super();
            NullArgumentException.check(in);
            inputArray = in;
        }
@ -349,8 +336,7 @@ public class EmpiricalDistribution extends AbstractRealDistribution
        @Override
        public void computeBinStats() throws IOException {
            for (int i = 0; i < inputArray.length; i++) {
-                SummaryStatistics stats =
-                    binStats.get(findBin(inputArray[i]));
+                SummaryStatistics stats = binStats.get(findBin(inputArray[i]));
                stats.addValue(inputArray[i]);
            }
        }
@ -362,12 +348,11 @@ public class EmpiricalDistribution extends AbstractRealDistribution
     * @param da object providing access to the data
     * @throws IOException  if an IO error occurs
     */
-    private void fillBinStats(final DataAdapter da)
-        throws IOException {
+    private void fillBinStats(final DataAdapter da) throws IOException {
        // Set up grid
        min = sampleStats.getMin();
        max = sampleStats.getMax();
-        delta = (max - min)/binCount;
+        delta = (max - min) / binCount;

        // Initialize binStats ArrayList
        if (!binStats.isEmpty()) {
@ -375,7 +360,7 @@ public class EmpiricalDistribution extends AbstractRealDistribution
        }
        for (int i = 0; i < binCount; i++) {
            SummaryStatistics stats = new SummaryStatistics();
-            binStats.add(i,stats);
+            binStats.add(i, stats);
        }

        // Filling data in binStats Array
@ -383,13 +368,12 @@ public class EmpiricalDistribution extends AbstractRealDistribution

        // Assign upperBounds based on bin counts
        upperBounds = new double[binCount];
-        upperBounds[0] =
-        ((double) binStats.get(0).getN()) / (double) sampleStats.getN();
-        for (int i = 1; i < binCount-1; i++) {
-            upperBounds[i] = upperBounds[i-1] +
-            ((double) binStats.get(i).getN()) / (double) sampleStats.getN();
+        upperBounds[0] = binStats.get(0).getN() / (double) sampleStats.getN();
+        for (int i = 1; i < binCount - 1; i++) {
+            upperBounds[i] = upperBounds[i - 1] +
+                binStats.get(i).getN() / (double) sampleStats.getN();
        }
-        upperBounds[binCount-1] = 1.0d;
+        upperBounds[binCount - 1] = 1d;
    }

    /**
@ -399,9 +383,8 @@ public class EmpiricalDistribution extends AbstractRealDistribution
     * @return the index of the bin containing the value
     */
    private int findBin(double value) {
-        return AccurateMath.min(
-                AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
-                binCount - 1);
+        return AccurateMath.min(AccurateMath.max((int) AccurateMath.ceil((value - min) / delta) - 1, 0),
+                                binCount - 1);
    }

    /**
@ -490,7 +473,7 @@ public class EmpiricalDistribution extends AbstractRealDistribution
        return loaded;
    }

-    // Distribution methods ---------------------------
+    // Distribution methods.

    /**
     * {@inheritDoc}
@ -588,21 +571,22 @@ public class EmpiricalDistribution extends AbstractRealDistribution
     */
    @Override
    public double inverseCumulativeProbability(final double p) {
-        if (p < 0.0 || p > 1.0) {
+        if (p < 0 ||
+            p > 1) {
            throw new OutOfRangeException(p, 0, 1);
        }

-        if (p == 0.0) {
+        if (p == 0) {
            return getSupportLowerBound();
        }

-        if (p == 1.0) {
+        if (p == 1) {
            return getSupportUpperBound();
        }

        int i = 0;
        while (cumBinP(i) < p) {
-            i++;
+            ++i;
        }

        final ContinuousDistribution kernel = getKernel(binStats.get(i));
--- a/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
+++ b/commons-math-legacy/src/test/java/org/apache/commons/math4/legacy/distribution/EmpiricalDistributionTest.java
@ -667,8 +667,7 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
        }
    }

-    @Ignore
-    @Test
+    @Ignore@Test
    public void testMath1462() {
        final double[] data = {
            6464.0205, 6449.1328, 6489.4569, 6497.5533, 6251.6487,
@ -689,13 +688,20 @@ public final class EmpiricalDistributionTest extends RealDistributionAbstractTes
        final EmpiricalDistribution ed = new EmpiricalDistribution(data.length);
        ed.load(data);

-        final double p50 = ed.inverseCumulativeProbability(0.5);
-        final double p51 = ed.inverseCumulativeProbability(0.51111);
-        final double p49 = ed.inverseCumulativeProbability(0.49999);
+        double v;
+        double p;

-        Assert.assertTrue(p51 < 6350);
-        Assert.assertTrue(p49 < 6341);
-        Assert.assertTrue(p50 < 7000);
+        p = 0.49999;
+        v = ed.inverseCumulativeProbability(p);
+        Assert.assertTrue("p=" + p + " => v=" + v, v < 6341);
+
+        p = 0.5;
+        v = ed.inverseCumulativeProbability(p);
+        Assert.assertTrue("p=" + p + " => v=" + v, v < 7000);
+
+        p = 0.51111;
+        v = ed.inverseCumulativeProbability(p);
+        Assert.assertTrue("p=" + p + " => v=" + v, v < 6350);
    }

    /**