From dbcf7dd6223cb91b6f15a644a2562c38b451240f Mon Sep 17 00:00:00 2001
From: Sebastien Brisard
Date: Sat, 26 Nov 2011 14:58:07 +0000
Subject: [PATCH] Merged ZipfDistribution and ZipfDistributionImpl (MATH-711).
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1206451 13f79535-47bb-0310-9956-ffa450edef68
---
.../math/distribution/ZipfDistribution.java | 178 +++++++++++++-
.../distribution/ZipfDistributionImpl.java | 222 ------------------
.../commons/math/random/RandomDataImpl.java | 6 +-
.../distribution/ZipfDistributionTest.java | 12 +-
.../commons/math/random/RandomDataTest.java | 4 +-
5 files changed, 177 insertions(+), 245 deletions(-)
delete mode 100644 src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java
diff --git a/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java b/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java
index fca6fff76..c8e24aea9 100644
--- a/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java
+++ b/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java
@@ -17,30 +17,184 @@
package org.apache.commons.math.distribution;
+import java.io.Serializable;
+
+import org.apache.commons.math.exception.NotStrictlyPositiveException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.util.FastMath;
+
/**
- * The Zipf (or zeta) Distribution.
- *
- * References:
- *
- *
+ * Implementation of the Zipf distribution.
*
+ * @see Zipf distribution (MathWorld)
* @version $Id$
*/
-public interface ZipfDistribution extends IntegerDistribution {
+public class ZipfDistribution extends AbstractIntegerDistribution
+ implements Serializable {
+ /** Serializable version identifier. */
+ private static final long serialVersionUID = -140627372283420404L;
+ /** Number of elements. */
+ private final int numberOfElements;
+ /** Exponent parameter of the distribution. */
+ private final double exponent;
+
+ /**
+ * Create a new Zipf distribution with the given number of elements and
+ * exponent.
+ *
+ * @param numberOfElements Number of elements.
+ * @param exponent Exponent.
+ * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
+ * or {@code exponent <= 0}.
+ */
+ public ZipfDistribution(final int numberOfElements, final double exponent)
+ throws NotStrictlyPositiveException {
+ if (numberOfElements <= 0) {
+ throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
+ numberOfElements);
+ }
+ if (exponent <= 0) {
+ throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
+ exponent);
+ }
+
+ this.numberOfElements = numberOfElements;
+ this.exponent = exponent;
+ }
+
/**
* Get the number of elements (e.g. corpus size) for the distribution.
*
* @return the number of elements
*/
- int getNumberOfElements();
+ public int getNumberOfElements() {
+ return numberOfElements;
+ }
/**
- * Get the exponent characterising the distribution.
+ * Get the exponent characterizing the distribution.
*
* @return the exponent
*/
- double getExponent();
+ public double getExponent() {
+ return exponent;
+ }
+
+ /** {@inheritDoc} */
+ public double probability(final int x) {
+ if (x <= 0 || x > numberOfElements) {
+ return 0.0;
+ }
+
+ return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public double cumulativeProbability(final int x) {
+ if (x <= 0) {
+ return 0.0;
+ } else if (x >= numberOfElements) {
+ return 1.0;
+ }
+
+ return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected int getDomainLowerBound(final double p) {
+ return 0;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected int getDomainUpperBound(final double p) {
+ return numberOfElements;
+ }
+
+ /**
+ * Calculates the Nth generalized harmonic number. See
+ * Harmonic
+ * Series.
+ *
+ * @param n Term in the series to calculate (must be larger than 1)
+ * @param m Exponent (special case {@code m = 1} is the harmonic series).
+ * @return the nth generalized harmonic number.
+ */
+ private double generalizedHarmonic(final int n, final double m) {
+ double value = 0;
+ for (int k = n; k > 0; --k) {
+ value += 1.0 / FastMath.pow(k, m);
+ }
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * The lower bound of the support is always 1 no matter the parameters.
+ *
+ * @return lower bound of the support (always 1)
+ */
+ @Override
+ public int getSupportLowerBound() {
+ return 1;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * The upper bound of the support is the number of elements.
+ *
+ * @return upper bound of the support
+ */
+ @Override
+ public int getSupportUpperBound() {
+ return getNumberOfElements();
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * For number of elements {@code N} and exponent {@code s}, the mean is
+ * {@code Hs1 / Hs}, where
+ *
+ * - {@code Hs1 = generalizedHarmonic(N, s - 1)},
+ * - {@code Hs = generalizedHarmonic(N, s)}.
+ *
+ */
+ @Override
+ protected double calculateNumericalMean() {
+ final int N = getNumberOfElements();
+ final double s = getExponent();
+
+ final double Hs1 = generalizedHarmonic(N, s - 1);
+ final double Hs = generalizedHarmonic(N, s);
+
+ return Hs1 / Hs;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * For number of elements {@code N} and exponent {@code s}, the mean is
+ * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where
+ *
+ * - {@code Hs2 = generalizedHarmonic(N, s - 2)},
+ * - {@code Hs1 = generalizedHarmonic(N, s - 1)},
+ * - {@code Hs = generalizedHarmonic(N, s)}.
+ *
+ */
+ @Override
+ protected double calculateNumericalVariance() {
+ final int N = getNumberOfElements();
+ final double s = getExponent();
+
+ final double Hs2 = generalizedHarmonic(N, s - 2);
+ final double Hs1 = generalizedHarmonic(N, s - 1);
+ final double Hs = generalizedHarmonic(N, s);
+
+ return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
+ }
}
diff --git a/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java b/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java
deleted file mode 100644
index b875dcb82..000000000
--- a/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.math.distribution;
-
-import java.io.Serializable;
-
-import org.apache.commons.math.exception.NotStrictlyPositiveException;
-import org.apache.commons.math.exception.util.LocalizedFormats;
-import org.apache.commons.math.util.FastMath;
-
-/**
- * Implementation for the {@link ZipfDistribution}.
- *
- * @version $Id$
- */
-public class ZipfDistributionImpl extends AbstractIntegerDistribution
- implements ZipfDistribution, Serializable {
- /** Serializable version identifier. */
- private static final long serialVersionUID = -140627372283420404L;
- /** Number of elements. */
- private final int numberOfElements;
- /** Exponent parameter of the distribution. */
- private final double exponent;
-
- /**
- * Create a new Zipf distribution with the given number of elements and
- * exponent.
- *
- * @param numberOfElements Number of elements.
- * @param exponent Exponent.
- * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
- * or {@code exponent <= 0}.
- */
- public ZipfDistributionImpl(final int numberOfElements,
- final double exponent) {
- if (numberOfElements <= 0) {
- throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
- numberOfElements);
- }
- if (exponent <= 0) {
- throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
- exponent);
- }
-
- this.numberOfElements = numberOfElements;
- this.exponent = exponent;
- }
-
- /**
- * {@inheritDoc}
- */
- public int getNumberOfElements() {
- return numberOfElements;
- }
-
- /**
- * {@inheritDoc}
- */
- public double getExponent() {
- return exponent;
- }
-
- /**
- * The probability mass function {@code P(X = x)} for a Zipf distribution.
- *
- * @param x Value at which the probability density function is evaluated.
- * @return the value of the probability mass function at {@code x}.
- */
- public double probability(final int x) {
- if (x <= 0 || x > numberOfElements) {
- return 0.0;
- }
-
- return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
- }
-
- /**
- * The probability distribution function {@code P(X <= x)} for a
- * Zipf distribution.
- *
- * @param x Value at which the PDF is evaluated.
- * @return Zipf distribution function evaluated at {@code x}.
- */
- @Override
- public double cumulativeProbability(final int x) {
- if (x <= 0) {
- return 0.0;
- } else if (x >= numberOfElements) {
- return 1.0;
- }
-
- return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
- }
-
- /**
- * Access the domain value lower bound, based on {@code p}, used to
- * bracket a PDF root.
- *
- * @param p Desired probability for the critical value.
- * @return the domain value lower bound, i.e. {@code P(X < 'lower bound') < p}.
- */
- @Override
- protected int getDomainLowerBound(final double p) {
- return 0;
- }
-
- /**
- * Access the domain value upper bound, based on {@code p}, used to
- * bracket a PDF root.
- *
- * @param p Desired probability for the critical value
- * @return the domain value upper bound, i.e. {@code P(X < 'upper bound') > p}.
- */
- @Override
- protected int getDomainUpperBound(final double p) {
- return numberOfElements;
- }
-
- /**
- * Calculates the Nth generalized harmonic number. See
- * Harmonic
- * Series.
- *
- * @param n Term in the series to calculate (must be larger than 1)
- * @param m Exponent (special case {@code m = 1} is the harmonic series).
- * @return the nth generalized harmonic number.
- */
- private double generalizedHarmonic(final int n, final double m) {
- double value = 0;
- for (int k = n; k > 0; --k) {
- value += 1.0 / FastMath.pow(k, m);
- }
- return value;
- }
-
- /**
- * {@inheritDoc}
- *
- * The lower bound of the support is always 1 no matter the parameters.
- *
- * @return lower bound of the support (always 1)
- */
- @Override
- public int getSupportLowerBound() {
- return 1;
- }
-
- /**
- * {@inheritDoc}
- *
- * The upper bound of the support is the number of elements
- *
- * @return upper bound of the support
- */
- @Override
- public int getSupportUpperBound() {
- return getNumberOfElements();
- }
-
- /**
- * {@inheritDoc}
- *
- * For number of elements N and exponent s, the mean is
- * Hs1 / Hs
where
- *
- * Hs1 = generalizedHarmonic(N, s - 1)
- * Hs = generalizedHarmonic(N, s)
- *
- *
- * @return {@inheritDoc}
- */
- @Override
- protected double calculateNumericalMean() {
- final int N = getNumberOfElements();
- final double s = getExponent();
-
- final double Hs1 = generalizedHarmonic(N, s - 1);
- final double Hs = generalizedHarmonic(N, s);
-
- return Hs1 / Hs;
- }
-
- /**
- * {@inheritDoc}
- *
- * For number of elements N and exponent s, the mean is
- * (Hs2 / Hs) - (Hs1^2 / Hs^2)
where
- *
- * Hs2 = generalizedHarmonic(N, s - 2)
- * Hs1 = generalizedHarmonic(N, s - 1)
- * Hs = generalizedHarmonic(N, s)
- *
- *
- * @return {@inheritDoc}
- */
- @Override
- protected double calculateNumericalVariance() {
- final int N = getNumberOfElements();
- final double s = getExponent();
-
- final double Hs2 = generalizedHarmonic(N, s - 2);
- final double Hs1 = generalizedHarmonic(N, s - 1);
- final double Hs = generalizedHarmonic(N, s);
-
- return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
- }
-}
diff --git a/src/main/java/org/apache/commons/math/random/RandomDataImpl.java b/src/main/java/org/apache/commons/math/random/RandomDataImpl.java
index 81284a088..2e6073fff 100644
--- a/src/main/java/org/apache/commons/math/random/RandomDataImpl.java
+++ b/src/main/java/org/apache/commons/math/random/RandomDataImpl.java
@@ -35,7 +35,7 @@ import org.apache.commons.math.distribution.IntegerDistribution;
import org.apache.commons.math.distribution.PascalDistribution;
import org.apache.commons.math.distribution.TDistribution;
import org.apache.commons.math.distribution.WeibullDistribution;
-import org.apache.commons.math.distribution.ZipfDistributionImpl;
+import org.apache.commons.math.distribution.ZipfDistribution;
import org.apache.commons.math.exception.MathInternalError;
import org.apache.commons.math.exception.NotStrictlyPositiveException;
import org.apache.commons.math.exception.NumberIsTooLargeException;
@@ -811,7 +811,7 @@ public class RandomDataImpl implements RandomData, Serializable {
}
/**
- * Generates a random value from the {@link ZipfDistributionImpl Zipf Distribution}.
+ * Generates a random value from the {@link ZipfDistribution Zipf Distribution}.
* This implementation uses {@link #nextInversionDeviate(IntegerDistribution) inversion}
* to generate random values.
*
@@ -821,7 +821,7 @@ public class RandomDataImpl implements RandomData, Serializable {
* @since 2.2
*/
public int nextZipf(int numberOfElements, double exponent) {
- return nextInversionDeviate(new ZipfDistributionImpl(numberOfElements, exponent));
+ return nextInversionDeviate(new ZipfDistribution(numberOfElements, exponent));
}
/**
diff --git a/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java b/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java
index f6dd430ad..f6c963e81 100644
--- a/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java
+++ b/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java
@@ -34,12 +34,12 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
@Test(expected=NotStrictlyPositiveException.class)
public void testPreconditions1() {
- new ZipfDistributionImpl(0, 1);
+ new ZipfDistribution(0, 1);
}
@Test(expected=NotStrictlyPositiveException.class)
public void testPreconditions2() {
- new ZipfDistributionImpl(1, 0);
+ new ZipfDistribution(1, 0);
}
//-------------- Implementations for abstract methods -----------------------
@@ -47,7 +47,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
/** Creates the default discrete distribution instance to use in tests. */
@Override
public IntegerDistribution makeDistribution() {
- return new ZipfDistributionImpl(10, 1);
+ return new ZipfDistribution(10, 1);
}
/** Creates the default probability density test input values */
@@ -93,9 +93,9 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
public void testMoments() {
final double tol = 1e-9;
ZipfDistribution dist;
-
- dist = new ZipfDistributionImpl(2, 0.5);
+
+ dist = new ZipfDistribution(2, 0.5);
Assert.assertEquals(dist.getNumericalMean(), FastMath.sqrt(2), tol);
- Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol);
+ Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol);
}
}
diff --git a/src/test/java/org/apache/commons/math/random/RandomDataTest.java b/src/test/java/org/apache/commons/math/random/RandomDataTest.java
index 1e1be72a1..69e585c76 100644
--- a/src/test/java/org/apache/commons/math/random/RandomDataTest.java
+++ b/src/test/java/org/apache/commons/math/random/RandomDataTest.java
@@ -41,7 +41,7 @@ import org.apache.commons.math.distribution.PoissonDistribution;
import org.apache.commons.math.distribution.PoissonDistribution;
import org.apache.commons.math.distribution.TDistribution;
import org.apache.commons.math.distribution.WeibullDistribution;
-import org.apache.commons.math.distribution.ZipfDistributionImpl;
+import org.apache.commons.math.distribution.ZipfDistribution;
import org.apache.commons.math.distribution.ZipfDistributionTest;
import org.apache.commons.math.stat.Frequency;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
@@ -1040,7 +1040,7 @@ public class RandomDataTest {
double[] densityValues = testInstance.makeDensityTestValues();
int sampleSize = 1000;
int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues);
- ZipfDistributionImpl distribution = (ZipfDistributionImpl) testInstance.makeDistribution();
+ ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution();
double[] expectedCounts = new double[length];
long[] observedCounts = new long[length];
for (int i = 0; i < length; i++) {