From dbcf7dd6223cb91b6f15a644a2562c38b451240f Mon Sep 17 00:00:00 2001 From: Sebastien Brisard Date: Sat, 26 Nov 2011 14:58:07 +0000 Subject: [PATCH] Merged ZipfDistribution and ZipfDistributionImpl (MATH-711). git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1206451 13f79535-47bb-0310-9956-ffa450edef68 --- .../math/distribution/ZipfDistribution.java | 178 +++++++++++++- .../distribution/ZipfDistributionImpl.java | 222 ------------------ .../commons/math/random/RandomDataImpl.java | 6 +- .../distribution/ZipfDistributionTest.java | 12 +- .../commons/math/random/RandomDataTest.java | 4 +- 5 files changed, 177 insertions(+), 245 deletions(-) delete mode 100644 src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java diff --git a/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java b/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java index fca6fff76..c8e24aea9 100644 --- a/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java +++ b/src/main/java/org/apache/commons/math/distribution/ZipfDistribution.java @@ -17,30 +17,184 @@ package org.apache.commons.math.distribution; +import java.io.Serializable; + +import org.apache.commons.math.exception.NotStrictlyPositiveException; +import org.apache.commons.math.exception.util.LocalizedFormats; +import org.apache.commons.math.util.FastMath; + /** - * The Zipf (or zeta) Distribution. - *

- * References: - *

- *

+ * Implementation of the Zipf distribution. * + * @see Zipf distribution (MathWorld) * @version $Id$ */ -public interface ZipfDistribution extends IntegerDistribution { +public class ZipfDistribution extends AbstractIntegerDistribution + implements Serializable { + /** Serializable version identifier. */ + private static final long serialVersionUID = -140627372283420404L; + /** Number of elements. */ + private final int numberOfElements; + /** Exponent parameter of the distribution. */ + private final double exponent; + + /** + * Create a new Zipf distribution with the given number of elements and + * exponent. + * + * @param numberOfElements Number of elements. + * @param exponent Exponent. + * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} + * or {@code exponent <= 0}. + */ + public ZipfDistribution(final int numberOfElements, final double exponent) + throws NotStrictlyPositiveException { + if (numberOfElements <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION, + numberOfElements); + } + if (exponent <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT, + exponent); + } + + this.numberOfElements = numberOfElements; + this.exponent = exponent; + } + /** * Get the number of elements (e.g. corpus size) for the distribution. * * @return the number of elements */ - int getNumberOfElements(); + public int getNumberOfElements() { + return numberOfElements; + } /** - * Get the exponent characterising the distribution. + * Get the exponent characterizing the distribution. * * @return the exponent */ - double getExponent(); + public double getExponent() { + return exponent; + } + + /** {@inheritDoc} */ + public double probability(final int x) { + if (x <= 0 || x > numberOfElements) { + return 0.0; + } + + return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(final int x) { + if (x <= 0) { + return 0.0; + } else if (x >= numberOfElements) { + return 1.0; + } + + return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); + } + + /** {@inheritDoc} */ + @Override + protected int getDomainLowerBound(final double p) { + return 0; + } + + /** {@inheritDoc} */ + @Override + protected int getDomainUpperBound(final double p) { + return numberOfElements; + } + + /** + * Calculates the Nth generalized harmonic number. See + * Harmonic + * Series. + * + * @param n Term in the series to calculate (must be larger than 1) + * @param m Exponent (special case {@code m = 1} is the harmonic series). + * @return the nth generalized harmonic number. + */ + private double generalizedHarmonic(final int n, final double m) { + double value = 0; + for (int k = n; k > 0; --k) { + value += 1.0 / FastMath.pow(k, m); + } + return value; + } + + /** + * {@inheritDoc} + * + * The lower bound of the support is always 1 no matter the parameters. + * + * @return lower bound of the support (always 1) + */ + @Override + public int getSupportLowerBound() { + return 1; + } + + /** + * {@inheritDoc} + * + * The upper bound of the support is the number of elements. + * + * @return upper bound of the support + */ + @Override + public int getSupportUpperBound() { + return getNumberOfElements(); + } + + /** + * {@inheritDoc} + * + * For number of elements {@code N} and exponent {@code s}, the mean is + * {@code Hs1 / Hs}, where + * + */ + @Override + protected double calculateNumericalMean() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return Hs1 / Hs; + } + + /** + * {@inheritDoc} + * + * For number of elements {@code N} and exponent {@code s}, the mean is + * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where + * + */ + @Override + protected double calculateNumericalVariance() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs2 = generalizedHarmonic(N, s - 2); + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); + } } diff --git a/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java b/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java deleted file mode 100644 index b875dcb82..000000000 --- a/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math.distribution; - -import java.io.Serializable; - -import org.apache.commons.math.exception.NotStrictlyPositiveException; -import org.apache.commons.math.exception.util.LocalizedFormats; -import org.apache.commons.math.util.FastMath; - -/** - * Implementation for the {@link ZipfDistribution}. - * - * @version $Id$ - */ -public class ZipfDistributionImpl extends AbstractIntegerDistribution - implements ZipfDistribution, Serializable { - /** Serializable version identifier. */ - private static final long serialVersionUID = -140627372283420404L; - /** Number of elements. */ - private final int numberOfElements; - /** Exponent parameter of the distribution. */ - private final double exponent; - - /** - * Create a new Zipf distribution with the given number of elements and - * exponent. - * - * @param numberOfElements Number of elements. - * @param exponent Exponent. - * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} - * or {@code exponent <= 0}. - */ - public ZipfDistributionImpl(final int numberOfElements, - final double exponent) { - if (numberOfElements <= 0) { - throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION, - numberOfElements); - } - if (exponent <= 0) { - throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT, - exponent); - } - - this.numberOfElements = numberOfElements; - this.exponent = exponent; - } - - /** - * {@inheritDoc} - */ - public int getNumberOfElements() { - return numberOfElements; - } - - /** - * {@inheritDoc} - */ - public double getExponent() { - return exponent; - } - - /** - * The probability mass function {@code P(X = x)} for a Zipf distribution. - * - * @param x Value at which the probability density function is evaluated. - * @return the value of the probability mass function at {@code x}. - */ - public double probability(final int x) { - if (x <= 0 || x > numberOfElements) { - return 0.0; - } - - return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); - } - - /** - * The probability distribution function {@code P(X <= x)} for a - * Zipf distribution. - * - * @param x Value at which the PDF is evaluated. - * @return Zipf distribution function evaluated at {@code x}. - */ - @Override - public double cumulativeProbability(final int x) { - if (x <= 0) { - return 0.0; - } else if (x >= numberOfElements) { - return 1.0; - } - - return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); - } - - /** - * Access the domain value lower bound, based on {@code p}, used to - * bracket a PDF root. - * - * @param p Desired probability for the critical value. - * @return the domain value lower bound, i.e. {@code P(X < 'lower bound') < p}. - */ - @Override - protected int getDomainLowerBound(final double p) { - return 0; - } - - /** - * Access the domain value upper bound, based on {@code p}, used to - * bracket a PDF root. - * - * @param p Desired probability for the critical value - * @return the domain value upper bound, i.e. {@code P(X < 'upper bound') > p}. - */ - @Override - protected int getDomainUpperBound(final double p) { - return numberOfElements; - } - - /** - * Calculates the Nth generalized harmonic number. See - * Harmonic - * Series. - * - * @param n Term in the series to calculate (must be larger than 1) - * @param m Exponent (special case {@code m = 1} is the harmonic series). - * @return the nth generalized harmonic number. - */ - private double generalizedHarmonic(final int n, final double m) { - double value = 0; - for (int k = n; k > 0; --k) { - value += 1.0 / FastMath.pow(k, m); - } - return value; - } - - /** - * {@inheritDoc} - * - * The lower bound of the support is always 1 no matter the parameters. - * - * @return lower bound of the support (always 1) - */ - @Override - public int getSupportLowerBound() { - return 1; - } - - /** - * {@inheritDoc} - * - * The upper bound of the support is the number of elements - * - * @return upper bound of the support - */ - @Override - public int getSupportUpperBound() { - return getNumberOfElements(); - } - - /** - * {@inheritDoc} - * - * For number of elements N and exponent s, the mean is - * Hs1 / Hs where - * - * - * @return {@inheritDoc} - */ - @Override - protected double calculateNumericalMean() { - final int N = getNumberOfElements(); - final double s = getExponent(); - - final double Hs1 = generalizedHarmonic(N, s - 1); - final double Hs = generalizedHarmonic(N, s); - - return Hs1 / Hs; - } - - /** - * {@inheritDoc} - * - * For number of elements N and exponent s, the mean is - * (Hs2 / Hs) - (Hs1^2 / Hs^2) where - * - * - * @return {@inheritDoc} - */ - @Override - protected double calculateNumericalVariance() { - final int N = getNumberOfElements(); - final double s = getExponent(); - - final double Hs2 = generalizedHarmonic(N, s - 2); - final double Hs1 = generalizedHarmonic(N, s - 1); - final double Hs = generalizedHarmonic(N, s); - - return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); - } -} diff --git a/src/main/java/org/apache/commons/math/random/RandomDataImpl.java b/src/main/java/org/apache/commons/math/random/RandomDataImpl.java index 81284a088..2e6073fff 100644 --- a/src/main/java/org/apache/commons/math/random/RandomDataImpl.java +++ b/src/main/java/org/apache/commons/math/random/RandomDataImpl.java @@ -35,7 +35,7 @@ import org.apache.commons.math.distribution.IntegerDistribution; import org.apache.commons.math.distribution.PascalDistribution; import org.apache.commons.math.distribution.TDistribution; import org.apache.commons.math.distribution.WeibullDistribution; -import org.apache.commons.math.distribution.ZipfDistributionImpl; +import org.apache.commons.math.distribution.ZipfDistribution; import org.apache.commons.math.exception.MathInternalError; import org.apache.commons.math.exception.NotStrictlyPositiveException; import org.apache.commons.math.exception.NumberIsTooLargeException; @@ -811,7 +811,7 @@ public class RandomDataImpl implements RandomData, Serializable { } /** - * Generates a random value from the {@link ZipfDistributionImpl Zipf Distribution}. + * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * This implementation uses {@link #nextInversionDeviate(IntegerDistribution) inversion} * to generate random values. * @@ -821,7 +821,7 @@ public class RandomDataImpl implements RandomData, Serializable { * @since 2.2 */ public int nextZipf(int numberOfElements, double exponent) { - return nextInversionDeviate(new ZipfDistributionImpl(numberOfElements, exponent)); + return nextInversionDeviate(new ZipfDistribution(numberOfElements, exponent)); } /** diff --git a/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java b/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java index f6dd430ad..f6c963e81 100644 --- a/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java +++ b/src/test/java/org/apache/commons/math/distribution/ZipfDistributionTest.java @@ -34,12 +34,12 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest { @Test(expected=NotStrictlyPositiveException.class) public void testPreconditions1() { - new ZipfDistributionImpl(0, 1); + new ZipfDistribution(0, 1); } @Test(expected=NotStrictlyPositiveException.class) public void testPreconditions2() { - new ZipfDistributionImpl(1, 0); + new ZipfDistribution(1, 0); } //-------------- Implementations for abstract methods ----------------------- @@ -47,7 +47,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest { /** Creates the default discrete distribution instance to use in tests. */ @Override public IntegerDistribution makeDistribution() { - return new ZipfDistributionImpl(10, 1); + return new ZipfDistribution(10, 1); } /** Creates the default probability density test input values */ @@ -93,9 +93,9 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest { public void testMoments() { final double tol = 1e-9; ZipfDistribution dist; - - dist = new ZipfDistributionImpl(2, 0.5); + + dist = new ZipfDistribution(2, 0.5); Assert.assertEquals(dist.getNumericalMean(), FastMath.sqrt(2), tol); - Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol); + Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol); } } diff --git a/src/test/java/org/apache/commons/math/random/RandomDataTest.java b/src/test/java/org/apache/commons/math/random/RandomDataTest.java index 1e1be72a1..69e585c76 100644 --- a/src/test/java/org/apache/commons/math/random/RandomDataTest.java +++ b/src/test/java/org/apache/commons/math/random/RandomDataTest.java @@ -41,7 +41,7 @@ import org.apache.commons.math.distribution.PoissonDistribution; import org.apache.commons.math.distribution.PoissonDistribution; import org.apache.commons.math.distribution.TDistribution; import org.apache.commons.math.distribution.WeibullDistribution; -import org.apache.commons.math.distribution.ZipfDistributionImpl; +import org.apache.commons.math.distribution.ZipfDistribution; import org.apache.commons.math.distribution.ZipfDistributionTest; import org.apache.commons.math.stat.Frequency; import org.apache.commons.math.stat.descriptive.SummaryStatistics; @@ -1040,7 +1040,7 @@ public class RandomDataTest { double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); - ZipfDistributionImpl distribution = (ZipfDistributionImpl) testInstance.makeDistribution(); + ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) {