mirror of
https://github.com/apache/commons-math.git
synced 2025-02-06 10:09:26 +00:00
Merged ZipfDistribution and ZipfDistributionImpl (MATH-711).
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1206451 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d161d473bb
commit
dbcf7dd622
@ -17,30 +17,184 @@
|
||||
|
||||
package org.apache.commons.math.distribution;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.math.exception.NotStrictlyPositiveException;
|
||||
import org.apache.commons.math.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math.util.FastMath;
|
||||
|
||||
/**
|
||||
* The Zipf (or zeta) Distribution.
|
||||
* <p>
|
||||
* References:
|
||||
* <ul>
|
||||
* <li><a href="http://mathworld.wolfram.com/ZipfDistribution.html">Zipf
|
||||
* Distribution</a></li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Implementation of the Zipf distribution.
|
||||
*
|
||||
* @see <a href="http://mathworld.wolfram.com/ZipfDistribution.html">Zipf distribution (MathWorld)</a>
|
||||
* @version $Id$
|
||||
*/
|
||||
public interface ZipfDistribution extends IntegerDistribution {
|
||||
public class ZipfDistribution extends AbstractIntegerDistribution
|
||||
implements Serializable {
|
||||
/** Serializable version identifier. */
|
||||
private static final long serialVersionUID = -140627372283420404L;
|
||||
/** Number of elements. */
|
||||
private final int numberOfElements;
|
||||
/** Exponent parameter of the distribution. */
|
||||
private final double exponent;
|
||||
|
||||
/**
|
||||
* Create a new Zipf distribution with the given number of elements and
|
||||
* exponent.
|
||||
*
|
||||
* @param numberOfElements Number of elements.
|
||||
* @param exponent Exponent.
|
||||
* @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
|
||||
* or {@code exponent <= 0}.
|
||||
*/
|
||||
public ZipfDistribution(final int numberOfElements, final double exponent)
|
||||
throws NotStrictlyPositiveException {
|
||||
if (numberOfElements <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
|
||||
numberOfElements);
|
||||
}
|
||||
if (exponent <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
|
||||
exponent);
|
||||
}
|
||||
|
||||
this.numberOfElements = numberOfElements;
|
||||
this.exponent = exponent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements (e.g. corpus size) for the distribution.
|
||||
*
|
||||
* @return the number of elements
|
||||
*/
|
||||
int getNumberOfElements();
|
||||
public int getNumberOfElements() {
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the exponent characterising the distribution.
|
||||
* Get the exponent characterizing the distribution.
|
||||
*
|
||||
* @return the exponent
|
||||
*/
|
||||
double getExponent();
|
||||
public double getExponent() {
|
||||
return exponent;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public double probability(final int x) {
|
||||
if (x <= 0 || x > numberOfElements) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public double cumulativeProbability(final int x) {
|
||||
if (x <= 0) {
|
||||
return 0.0;
|
||||
} else if (x >= numberOfElements) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
protected int getDomainLowerBound(final double p) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
protected int getDomainUpperBound(final double p) {
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the Nth generalized harmonic number. See
|
||||
* <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
|
||||
* Series</a>.
|
||||
*
|
||||
* @param n Term in the series to calculate (must be larger than 1)
|
||||
* @param m Exponent (special case {@code m = 1} is the harmonic series).
|
||||
* @return the n<sup>th</sup> generalized harmonic number.
|
||||
*/
|
||||
private double generalizedHarmonic(final int n, final double m) {
|
||||
double value = 0;
|
||||
for (int k = n; k > 0; --k) {
|
||||
value += 1.0 / FastMath.pow(k, m);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* The lower bound of the support is always 1 no matter the parameters.
|
||||
*
|
||||
* @return lower bound of the support (always 1)
|
||||
*/
|
||||
@Override
|
||||
public int getSupportLowerBound() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* The upper bound of the support is the number of elements.
|
||||
*
|
||||
* @return upper bound of the support
|
||||
*/
|
||||
@Override
|
||||
public int getSupportUpperBound() {
|
||||
return getNumberOfElements();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* For number of elements {@code N} and exponent {@code s}, the mean is
|
||||
* {@code Hs1 / Hs}, where
|
||||
* <ul>
|
||||
* <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li>
|
||||
* <li>{@code Hs = generalizedHarmonic(N, s)}.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Override
|
||||
protected double calculateNumericalMean() {
|
||||
final int N = getNumberOfElements();
|
||||
final double s = getExponent();
|
||||
|
||||
final double Hs1 = generalizedHarmonic(N, s - 1);
|
||||
final double Hs = generalizedHarmonic(N, s);
|
||||
|
||||
return Hs1 / Hs;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* For number of elements {@code N} and exponent {@code s}, the mean is
|
||||
* {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where
|
||||
* <ul>
|
||||
* <li>{@code Hs2 = generalizedHarmonic(N, s - 2)},</li>
|
||||
* <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li>
|
||||
* <li>{@code Hs = generalizedHarmonic(N, s)}.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Override
|
||||
protected double calculateNumericalVariance() {
|
||||
final int N = getNumberOfElements();
|
||||
final double s = getExponent();
|
||||
|
||||
final double Hs2 = generalizedHarmonic(N, s - 2);
|
||||
final double Hs1 = generalizedHarmonic(N, s - 1);
|
||||
final double Hs = generalizedHarmonic(N, s);
|
||||
|
||||
return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
|
||||
}
|
||||
}
|
||||
|
@ -1,222 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.math.distribution;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.commons.math.exception.NotStrictlyPositiveException;
|
||||
import org.apache.commons.math.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math.util.FastMath;
|
||||
|
||||
/**
|
||||
* Implementation for the {@link ZipfDistribution}.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ZipfDistributionImpl extends AbstractIntegerDistribution
|
||||
implements ZipfDistribution, Serializable {
|
||||
/** Serializable version identifier. */
|
||||
private static final long serialVersionUID = -140627372283420404L;
|
||||
/** Number of elements. */
|
||||
private final int numberOfElements;
|
||||
/** Exponent parameter of the distribution. */
|
||||
private final double exponent;
|
||||
|
||||
/**
|
||||
* Create a new Zipf distribution with the given number of elements and
|
||||
* exponent.
|
||||
*
|
||||
* @param numberOfElements Number of elements.
|
||||
* @param exponent Exponent.
|
||||
* @exception NotStrictlyPositiveException if {@code numberOfElements <= 0}
|
||||
* or {@code exponent <= 0}.
|
||||
*/
|
||||
public ZipfDistributionImpl(final int numberOfElements,
|
||||
final double exponent) {
|
||||
if (numberOfElements <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION,
|
||||
numberOfElements);
|
||||
}
|
||||
if (exponent <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT,
|
||||
exponent);
|
||||
}
|
||||
|
||||
this.numberOfElements = numberOfElements;
|
||||
this.exponent = exponent;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public int getNumberOfElements() {
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public double getExponent() {
|
||||
return exponent;
|
||||
}
|
||||
|
||||
/**
|
||||
* The probability mass function {@code P(X = x)} for a Zipf distribution.
|
||||
*
|
||||
* @param x Value at which the probability density function is evaluated.
|
||||
* @return the value of the probability mass function at {@code x}.
|
||||
*/
|
||||
public double probability(final int x) {
|
||||
if (x <= 0 || x > numberOfElements) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
|
||||
}
|
||||
|
||||
/**
|
||||
* The probability distribution function {@code P(X <= x)} for a
|
||||
* Zipf distribution.
|
||||
*
|
||||
* @param x Value at which the PDF is evaluated.
|
||||
* @return Zipf distribution function evaluated at {@code x}.
|
||||
*/
|
||||
@Override
|
||||
public double cumulativeProbability(final int x) {
|
||||
if (x <= 0) {
|
||||
return 0.0;
|
||||
} else if (x >= numberOfElements) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Access the domain value lower bound, based on {@code p}, used to
|
||||
* bracket a PDF root.
|
||||
*
|
||||
* @param p Desired probability for the critical value.
|
||||
* @return the domain value lower bound, i.e. {@code P(X < 'lower bound') < p}.
|
||||
*/
|
||||
@Override
|
||||
protected int getDomainLowerBound(final double p) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Access the domain value upper bound, based on {@code p}, used to
|
||||
* bracket a PDF root.
|
||||
*
|
||||
* @param p Desired probability for the critical value
|
||||
* @return the domain value upper bound, i.e. {@code P(X < 'upper bound') > p}.
|
||||
*/
|
||||
@Override
|
||||
protected int getDomainUpperBound(final double p) {
|
||||
return numberOfElements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the Nth generalized harmonic number. See
|
||||
* <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
|
||||
* Series</a>.
|
||||
*
|
||||
* @param n Term in the series to calculate (must be larger than 1)
|
||||
* @param m Exponent (special case {@code m = 1} is the harmonic series).
|
||||
* @return the n<sup>th</sup> generalized harmonic number.
|
||||
*/
|
||||
private double generalizedHarmonic(final int n, final double m) {
|
||||
double value = 0;
|
||||
for (int k = n; k > 0; --k) {
|
||||
value += 1.0 / FastMath.pow(k, m);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* The lower bound of the support is always 1 no matter the parameters.
|
||||
*
|
||||
* @return lower bound of the support (always 1)
|
||||
*/
|
||||
@Override
|
||||
public int getSupportLowerBound() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* The upper bound of the support is the number of elements
|
||||
*
|
||||
* @return upper bound of the support
|
||||
*/
|
||||
@Override
|
||||
public int getSupportUpperBound() {
|
||||
return getNumberOfElements();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* For number of elements N and exponent s, the mean is
|
||||
* <code>Hs1 / Hs</code> where
|
||||
* <ul>
|
||||
* <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
|
||||
* <li><code>Hs = generalizedHarmonic(N, s)</code></li>
|
||||
* </ul>
|
||||
*
|
||||
* @return {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
protected double calculateNumericalMean() {
|
||||
final int N = getNumberOfElements();
|
||||
final double s = getExponent();
|
||||
|
||||
final double Hs1 = generalizedHarmonic(N, s - 1);
|
||||
final double Hs = generalizedHarmonic(N, s);
|
||||
|
||||
return Hs1 / Hs;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* For number of elements N and exponent s, the mean is
|
||||
* <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
|
||||
* <ul>
|
||||
* <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
|
||||
* <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
|
||||
* <li><code>Hs = generalizedHarmonic(N, s)</code></li>
|
||||
* </ul>
|
||||
*
|
||||
* @return {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
protected double calculateNumericalVariance() {
|
||||
final int N = getNumberOfElements();
|
||||
final double s = getExponent();
|
||||
|
||||
final double Hs2 = generalizedHarmonic(N, s - 2);
|
||||
final double Hs1 = generalizedHarmonic(N, s - 1);
|
||||
final double Hs = generalizedHarmonic(N, s);
|
||||
|
||||
return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
|
||||
}
|
||||
}
|
@ -35,7 +35,7 @@ import org.apache.commons.math.distribution.IntegerDistribution;
|
||||
import org.apache.commons.math.distribution.PascalDistribution;
|
||||
import org.apache.commons.math.distribution.TDistribution;
|
||||
import org.apache.commons.math.distribution.WeibullDistribution;
|
||||
import org.apache.commons.math.distribution.ZipfDistributionImpl;
|
||||
import org.apache.commons.math.distribution.ZipfDistribution;
|
||||
import org.apache.commons.math.exception.MathInternalError;
|
||||
import org.apache.commons.math.exception.NotStrictlyPositiveException;
|
||||
import org.apache.commons.math.exception.NumberIsTooLargeException;
|
||||
@ -811,7 +811,7 @@ public class RandomDataImpl implements RandomData, Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a random value from the {@link ZipfDistributionImpl Zipf Distribution}.
|
||||
* Generates a random value from the {@link ZipfDistribution Zipf Distribution}.
|
||||
* This implementation uses {@link #nextInversionDeviate(IntegerDistribution) inversion}
|
||||
* to generate random values.
|
||||
*
|
||||
@ -821,7 +821,7 @@ public class RandomDataImpl implements RandomData, Serializable {
|
||||
* @since 2.2
|
||||
*/
|
||||
public int nextZipf(int numberOfElements, double exponent) {
|
||||
return nextInversionDeviate(new ZipfDistributionImpl(numberOfElements, exponent));
|
||||
return nextInversionDeviate(new ZipfDistribution(numberOfElements, exponent));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -34,12 +34,12 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
|
||||
|
||||
@Test(expected=NotStrictlyPositiveException.class)
|
||||
public void testPreconditions1() {
|
||||
new ZipfDistributionImpl(0, 1);
|
||||
new ZipfDistribution(0, 1);
|
||||
}
|
||||
|
||||
@Test(expected=NotStrictlyPositiveException.class)
|
||||
public void testPreconditions2() {
|
||||
new ZipfDistributionImpl(1, 0);
|
||||
new ZipfDistribution(1, 0);
|
||||
}
|
||||
|
||||
//-------------- Implementations for abstract methods -----------------------
|
||||
@ -47,7 +47,7 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
|
||||
/** Creates the default discrete distribution instance to use in tests. */
|
||||
@Override
|
||||
public IntegerDistribution makeDistribution() {
|
||||
return new ZipfDistributionImpl(10, 1);
|
||||
return new ZipfDistribution(10, 1);
|
||||
}
|
||||
|
||||
/** Creates the default probability density test input values */
|
||||
@ -93,9 +93,9 @@ public class ZipfDistributionTest extends IntegerDistributionAbstractTest {
|
||||
public void testMoments() {
|
||||
final double tol = 1e-9;
|
||||
ZipfDistribution dist;
|
||||
|
||||
dist = new ZipfDistributionImpl(2, 0.5);
|
||||
|
||||
dist = new ZipfDistribution(2, 0.5);
|
||||
Assert.assertEquals(dist.getNumericalMean(), FastMath.sqrt(2), tol);
|
||||
Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol);
|
||||
Assert.assertEquals(dist.getNumericalVariance(), 0.24264068711928521, tol);
|
||||
}
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ import org.apache.commons.math.distribution.PoissonDistribution;
|
||||
import org.apache.commons.math.distribution.PoissonDistribution;
|
||||
import org.apache.commons.math.distribution.TDistribution;
|
||||
import org.apache.commons.math.distribution.WeibullDistribution;
|
||||
import org.apache.commons.math.distribution.ZipfDistributionImpl;
|
||||
import org.apache.commons.math.distribution.ZipfDistribution;
|
||||
import org.apache.commons.math.distribution.ZipfDistributionTest;
|
||||
import org.apache.commons.math.stat.Frequency;
|
||||
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
|
||||
@ -1040,7 +1040,7 @@ public class RandomDataTest {
|
||||
double[] densityValues = testInstance.makeDensityTestValues();
|
||||
int sampleSize = 1000;
|
||||
int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues);
|
||||
ZipfDistributionImpl distribution = (ZipfDistributionImpl) testInstance.makeDistribution();
|
||||
ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution();
|
||||
double[] expectedCounts = new double[length];
|
||||
long[] observedCounts = new long[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user