diff --git a/src/experimental/R/README.txt b/src/experimental/R/README.txt new file mode 100644 index 000000000..a22e1f1d8 --- /dev/null +++ b/src/experimental/R/README.txt @@ -0,0 +1,167 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ + +INTRODUCTION + +The purpose of the R programs included in this directory is to validate +the target values used in jakarta commons math unit tests. Success running the +R and commons-math tests on a platform (OS and R version) means that R and +commons-math give results for the test cases that are close in value. The +tests include configurable tolerance levels; but care must be taken in changing +these, since in most cases the pre-set tolerance is close to the number of +decimal digits used in expressing the expected values (both here and in the +corresponding commons-math unit tests). + +Of course it is always possible that both R and commons-math give incorrect +values for test cases, so these tests should not be interpreted as definitive +in any absolute sense. The value of developing and running the tests is really +to generate questions (and answers!) when the two systems give different +results. + +Contributions of additional test cases (both R and Junit code) or just +R programs to validate commons-math tests that are not covered here would be +greatly appreciated. + +SETUP + +0) Download and install R. You can get R here +http://www.r-project.org/ +Follow the install instructions and make sure that you can launch R from this +(i.e., either explitly add R to your OS path or let the install package do it +for you). + +1) Launch R from this directory and type +> source("testAll") +to an R prompt. This should produce output to the console similar to this: + +Binomial test cases +Density test n = 10, p = 0.7...........................................SUCCEEDED +Distribution test n = 10, p = 0.7......................................SUCCEEDED +Inverse Distribution test n = 10, p = 0.7..............................SUCCEEDED +Density test n = 5, p = 0..............................................SUCCEEDED +Distribution test n = 5, p = 0.........................................SUCCEEDED +Density test n = 5, p = 1..............................................SUCCEEDED +Distribution test n = 5, p = 1.........................................SUCCEEDED +-------------------------------------------------------------------------------- +Normal test cases +Distribution test mu = 2.1, sigma = 1.4................................SUCCEEDED +Distribution test mu = 2.1, sigma = 1.4................................SUCCEEDED +Distribution test mu = 0, sigma = 1....................................SUCCEEDED +Distribution test mu = 0, sigma = 0.1..................................SUCCEEDED +-------------------------------------------------------------------------------- +... + + + +WORKING WITH THE TESTS + +The R distribution comes with online manuals that you can view by launching +a browser instance and then entering + +> help.start() + +at an R prompt. Poking about in the test case files and the online docs should +bring you up to speed fairly quickly. Here are some basic things to get +you started. I should note at this point that I by no means an expert R +programmer, so some things may not be implemented in the the nicest way. +Comments / suggestions for improvement are welcome! + +All of the test cases use some basic functions and global constants (screen +width and success / failure strings) defined in "testFunctions." The +R "source" function is used to "import" these functions into each of the test +programs. The "testAll" program pulls together and executes all of the +individual test programs. You can execute any one of them by just entering + +> source(). + +The "assertEquals" function in the testFunctions file mimics the similarly +named function used by Junit: + +assertEquals <- function(expected, observed, tol, message) { + if(any(abs(expected - observed) > tol)) { + cat("FAILURE: ",message,"\n") + cat("EXPECTED: ",expected,"\n") + cat("OBSERVED: ",observed,"\n") + return(0) + } else { + return(1) + } +} + +The and arguments can be scalar values, vectors or +matrices. If the arguments are vectors or matrices, corresponding entries +are compared. + +The standard pattern used throughout the tests looks like this (from +binomialTestCases): + +Start by defining a "verification function" -- in this example a function to +verify computation of binomial probabilities. The argument is a vector +of integer values to feed into the density function, is a vector of +the computed probabilies from the commons-math Junit tests, and

are +parameters of the distribution and is the error tolerance of the test. +The function computes the probabilities using R and compares the values that +R produces with those in the vector. + +verifyDensity <- function(points, expected, n, p, tol) { + rDensityValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDensityValues[i] <- dbinom(point, n, p, log = FALSE) + } + output <- c("Density test n = ", n, ", p = ", p) + if (assertEquals(expected,rDensityValues,tol,"Density Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +The displayPadded function just displays its first and second arguments with +enough dots in between to make the whole string WIDTH characters long. It is +defined in testFunctions. + +Then call this function with different parameters corresponding to the different +Junit test cases: + +size <- 10.0 +probability <- 0.70 + +densityPoints <- c(-1,0,1,2,3,4,5,6,7,8,9,10,11) +densityValues <- c(0, 0.0000, 0.0001, 0.0014, 0.0090, 0.0368, 0.1029, + 0.2001, 0.2668, 0.2335, 0.1211, 0.0282, 0) +... +verifyDensity(densityPoints, densityValues, size, probability, tol) + +If the values computed by R match the target values in densityValues, this will +produce one line of output to the console: + +Density test n = 10, p = 0.7...........................................SUCCEEDED + +If you modify the value of tol set at the top of binomialTestCases to make the +test more sensitive than the number of digits specified in the densityValues +vector, it will fail, producing the following output, showing the failure and +the expected and observed values: + +FAILURE: Density Values +EXPECTED: 0 0 1e-04 0.0014 0.009 0.0368 0.1029 0.2001 0.2668 0.2335 0.1211 / + 0.0282 0 +OBSERVED: 0 5.9049e-06 0.000137781 0.0014467005 0.009001692 0.036756909 / +0.1029193452 0.200120949 0.266827932 0.2334744405 0.121060821 0.0282475249 0 +Density test n = 10, p = 0.7..............................................FAILED + + diff --git a/src/experimental/R/binomialTestCases b/src/experimental/R/binomialTestCases new file mode 100644 index 000000000..3027356a1 --- /dev/null +++ b/src/experimental/R/binomialTestCases @@ -0,0 +1,126 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate Binomial distribution tests in +# org.apache.commons.math.distribution.BinomialDistributionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +# dbinom(x, size, prob, log = FALSE) <- density +# pbinom(q, size, prob, lower.tail = TRUE, log.p = FALSE) <- distribution +# qbinom(p, size, prob, lower.tail = TRUE, log.p = FALSE) <- quantiles +#------------------------------------------------------------------------------ +tol <- 1E-4 # error tolerance for tests +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions + +# function to verify density computations + +verifyDensity <- function(points, expected, n, p, tol) { + rDensityValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDensityValues[i] <- dbinom(point, n, p, log = FALSE) + } + output <- c("Density test n = ", n, ", p = ", p) + if (assertEquals(expected,rDensityValues,tol,"Density Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# function to verify distribution computations + +verifyDistribution <- function(points, expected, n, p, tol) { + rDistValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDistValues[i] <- pbinom(point, n, p, log = FALSE) + } + output <- c("Distribution test n = ", n, ", p = ", p) + if (assertEquals(expected,rDistValues,tol,"Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +#-------------------------------------------------------------------------- +cat("Binomial test cases\n") + +size <- 10.0 +probability <- 0.70 + +densityPoints <- c(-1,0,1,2,3,4,5,6,7,8,9,10,11) +densityValues <- c(0, 0.0000, 0.0001, 0.0014, 0.0090, 0.0368, 0.1029, + 0.2001, 0.2668, 0.2335, 0.1211, 0.0282, 0) +distributionValues <- c(0, 0.0000, 0.0001, 0.0016, 0.0106, 0.0473, + 0.1503, 0.3504, 0.6172, 0.8507, 0.9718, 1, 1) +inverseCumPoints <- c( 0.001, 0.010, 0.025, 0.050, 0.100, 0.999, + 0.990, 0.975, 0.950, 0.900) +inverseCumValues <- c(1, 2, 3, 4, 4, 9, 9, 9, 8, 8) + +verifyDensity(densityPoints,densityValues,size,probability,tol) +verifyDistribution(densityPoints, distributionValues, size, probability, tol) + +i <- 0 +rInverseCumValues <- rep(0,length(inverseCumPoints)) +for (point in inverseCumPoints) { + i <- i + 1 + rInverseCumValues[i] <- qbinom(point, size, probability, log = FALSE) +} + +output <- c("Inverse Distribution test n = ", size, ", p = ", probability) +# R defines quantiles from the right, need to subtract one +if (assertEquals(inverseCumValues, rInverseCumValues-1, tol, + "Inverse Dist Values")) { + displayPadded(output, SUCCEEDED, 80) +} else { + displayPadded(output, FAILED, 80) +} + +# Degenerate cases + +size <- 5 +probability <- 0.0 + +densityPoints <- c(-1, 0, 1, 10, 11) +densityValues <- c(0, 1, 0, 0, 0) +distributionPoints <- c(-1, 0, 1, 5, 10) +distributionValues <- c(0, 1, 1, 1, 1) + +verifyDensity(densityPoints,densityValues,size,probability,tol) +verifyDistribution(distributionPoints,distributionValues,size,probability,tol) + +size <- 5 +probability <- 1.0 + +densityPoints <- c(-1, 0, 1, 2, 5, 10) +densityValues <- c(0, 0, 0, 0, 1, 0) +distributionPoints <- c(-1, 0, 1, 2, 5, 10) +distributionValues <- c(0, 0, 0, 0, 1, 1) + +verifyDensity(densityPoints,densityValues,size,probability,tol) +verifyDistribution(distributionPoints,distributionValues,size,probability,tol) + +displayDashes(WIDTH) diff --git a/src/experimental/R/chiSquareTestCases b/src/experimental/R/chiSquareTestCases new file mode 100644 index 000000000..313598c82 --- /dev/null +++ b/src/experimental/R/chiSquareTestCases @@ -0,0 +1,99 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate ChiSquare tests in +# org.apache.commons.math.stat.inference.ChiSquareTestTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +#chisq.test(x, y = NULL, correct = TRUE, +# p = rep(1/length(x), length(x)), +# simulate.p.value = FALSE, B = 2000) +#------------------------------------------------------------------------------ +tol <- 1E-9 # error tolerance for tests +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions + +verifyTable <- function(counts, expectedP, expectedStat, tol, desc) { + results <- chisq.test(counts) + if (assertEquals(expectedP, results$p.value, tol, "p-value")) { + displayPadded(c(desc," p-value test"), SUCCEEDED, WIDTH) + } else { + displayPadded(c(desc, " p-value test"), FAILED, WIDTH) + } + if (assertEquals(expectedStat, results$statistic, tol, + "ChiSquare Statistic")) { + displayPadded(c(desc, " chi-square statistic test"), SUCCEEDED, WIDTH) + } else { + displayPadded(c(desc, " chi-square statistic test"), FAILED, WIDTH) + } +} + +verifyHomogeneity <- function(obs, exp, expectedP, expectedStat, + tol, desc) { + chi <- sum((obs - exp)^2/exp) + p <- 1 - pchisq(sum((obs - exp)^2/exp), length(obs) - 1) + if (assertEquals(expectedP, p, tol, "p-value")) { + displayPadded(c(desc, " p-value test"), SUCCEEDED, WIDTH) + } else { + displayPadded(c(desc, " p-value test"), FAILED, WIDTH) + } + if (assertEquals(expectedStat, chi, tol, + "ChiSquare Statistic")) { + displayPadded(c(desc, " chi-square statistic test"), SUCCEEDED, WIDTH) + } else { + displayPadded(c(desc, " chi-square statistic test"), FAILED, WIDTH) + } +} + +cat("ChiSquareTest test cases\n") + +observed <- c(10, 9, 11) +expected <- c(10, 10, 10) +verifyHomogeneity(observed, expected, 0.904837418036, 0.2, tol, + "testChiSquare1") + +observed <- c(500, 623, 72, 70, 31) +expected <- c(485, 541, 82, 61, 37) +verifyHomogeneity(observed, expected, 0.002512096, 16.4131070362, tol, + "testChiSquare2") + +observed <- c(2372383, 584222, 257170, 17750155, 7903832, 489265, + 209628, 393899) +expected <- c(3389119.5, 649136.6, 285745.4, 25357364.76, 11291189.78, + 543628.0, 232921.0, 437665.75) +verifyHomogeneity(observed, expected, 0, 3624883.342907764, tol, + "testChiSquareLargeTestStatistic") + +counts <- matrix(c(40, 22, 43, 91, 21, 28, 60, 10, 22), nc = 3); +verifyTable(counts, 0.000144751460134, 22.709027688, tol, + "testChiSquareIndependence1") + +counts <- matrix(c(10, 15, 30, 40, 60, 90), nc = 3); +verifyTable(counts, 0.918987499852, 0.168965517241, tol, + "testChiSquareIndependence2") + +counts <- matrix(c(40, 0, 4, 91, 1, 2, 60, 2, 0), nc = 3); +verifyTable(counts, 0.0462835770603, 9.67444662263, tol, + "testChiSquareZeroCount") + +displayDashes(WIDTH) + + diff --git a/src/experimental/R/exponentialTestCases b/src/experimental/R/exponentialTestCases new file mode 100644 index 000000000..36c2e0d97 --- /dev/null +++ b/src/experimental/R/exponentialTestCases @@ -0,0 +1,69 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate exponential distribution tests in +# org.apache.commons.math.distribution.ExponentialDistributionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +# pexp(q, rate = 1, lower.tail = TRUE, log.p = FALSE) <- distribution +# qexp(p, rate = 1, lower.tail = TRUE, log.p = FALSE) <- quantiles +#------------------------------------------------------------------------------ +tol <- 1E-7 + +# Function definitions + +source("testFunctions") # utility test functions + +# function to verify distribution computations + +verifyDistribution <- function(points, expected, mean, tol) { + rDistValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDistValues[i] <- pexp(point, 1/mean) + } + output <- c("Distribution test mean = ", mean) + if (assertEquals(expected, rDistValues, tol, "Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +#-------------------------------------------------------------------------- +cat("Exponential test cases\n") + +mean <- 5 + +distributionValues <- c(0, 0, 0.001, 0.01, 0.025, 0.05, 0.1, 0.999, + 0.990, 0.975, 0.950, 0.900) +distributionPoints <- c(-2, 0, 0.005002502, 0.05025168, 0.1265890, 0.2564665, 0.5268026, + 34.53878, 23.02585, 18.44440, 14.97866, 11.51293) + +verifyDistribution(distributionPoints, distributionValues, mean, tol) + +output <- "Probability test P(.25 < X < .75)" +if (assertEquals(0.0905214, pexp(.75, 1/mean) - pexp(.25, 1/mean), tol, "Probability value")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +displayDashes(WIDTH) diff --git a/src/experimental/R/hypergeometricTestCases b/src/experimental/R/hypergeometricTestCases new file mode 100644 index 000000000..e3d6c811c --- /dev/null +++ b/src/experimental/R/hypergeometricTestCases @@ -0,0 +1,135 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate Hypergeometric distribution tests in +# org.apache.commons.math.distribution.HypergeometricDistributionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +# dhyper(x, m, n, k, log = FALSE) <- density +# phyper(q, m, n, k, lower.tail = TRUE, log.p = FALSE) <- distribution +# qhyper(p, m, n, k, lower.tail = TRUE, log.p = FALSE) <- quantiles +#------------------------------------------------------------------------------ +tol <- 1E-6 # error tolerance for tests +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions + +# function to verify density computations + +verifyDensity <- function(points, expected, good, bad, selected, tol) { + rDensityValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDensityValues[i] <- dhyper(point, good, bad, selected) + } + output <- c("Density test good = ", good, ", bad = ", bad, + ", selected = ",selected) + if (assertEquals(expected,rDensityValues,tol,"Density Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# function to verify distribution computations + +verifyDistribution <- function(points, expected, good, bad, selected, tol) { + rDistValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDistValues[i] <- phyper(point, good, bad, selected) + } + output <- c("Distribution test good = ", good, ", bad = ", + bad, ", selected = ",selected) + if (assertEquals(expected,rDistValues,tol,"Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +#-------------------------------------------------------------------------- +cat("Hypergeometric test cases\n") + +good <- 5 +bad <- 5 +selected <- 5 + +densityPoints <- c(-1, 0, 1, 2, 3, 4, 5, 10) +densityValues <- c(0, 0.003968, 0.099206, 0.396825, 0.396825, 0.099206, + 0.003968, 0) +distributionValues <- c(0, .003968, .103175, .50000, .896825, .996032, + 1.00000, 1) +#Eliminate p=1 case because it will mess up adjustement below +inverseCumPoints <- c(0, 0.001, 0.010, 0.025, 0.050, 0.100, 0.999, + 0.990, 0.975, 0.950, 0.900) +inverseCumValues <- c(-1, -1, 0, 0, 0, 0, 4, 3, 3, 3, 3) + +verifyDensity(densityPoints, densityValues, good, bad, selected, tol) +verifyDistribution(densityPoints, distributionValues, good, bad, selected, tol) + +i <- 0 +rInverseCumValues <- rep(0,length(inverseCumPoints)) +for (point in inverseCumPoints) { + i <- i + 1 + rInverseCumValues[i] <- qhyper(point, good, bad, selected) +} + +output <- c("Inverse Distribution test good = ", good, ", bad = ", bad, + ", selected = ", selected) +# R defines quantiles from the right, need to subtract one +if (assertEquals(inverseCumValues, rInverseCumValues-1, tol, + "Inverse Dist Values")) { + displayPadded(output, SUCCEEDED, 80) +} else { + displayPadded(output, FAILED, 80) +} + +# Degenerate cases +good <- 5 +bad <- 0 +selected <- 3 +densityPoints <- c(-1, 0, 1, 3, 10) +densityValues <- c(0, 0, 0, 1, 0) +distributionValues <- c(0, 0, 0, 1, 1) +verifyDensity(densityPoints, densityValues, good, bad, selected, tol) +verifyDistribution(densityPoints, distributionValues, good, bad, selected, tol) + +good <- 0 +bad <- 5 +selected <- 3 +densityPoints <- c(-1, 0, 1, 3, 10) +densityValues <- c(0, 1, 0, 0, 0) +distributionValues <- c(0, 1, 1, 1, 1) +verifyDensity(densityPoints, densityValues, good, bad, selected, tol) +verifyDistribution(densityPoints, distributionValues, good, bad, selected, tol) + +good <- 3 +bad <- 2 +selected <- 5 +densityPoints <- c(-1, 0, 1, 3, 10) +densityValues <- c(0, 0, 0, 1, 0) +distributionValues <- c(0, 0, 0, 1, 1) +verifyDensity(densityPoints, densityValues, good, bad, selected, tol) +verifyDistribution(densityPoints, distributionValues, good, bad, selected, tol) + +displayDashes(WIDTH) diff --git a/src/experimental/R/normalTestCases b/src/experimental/R/normalTestCases new file mode 100644 index 000000000..4de80fe05 --- /dev/null +++ b/src/experimental/R/normalTestCases @@ -0,0 +1,84 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate Normal distribution tests in +# org.apache.commons.math.distribution.NormalDistributionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +# pnorm(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) <-- distribution +#----------------------------------------------------------------------------- +tol <- 1E-7 + +# Function definitions + +source("testFunctions") # utility test functions + +# function to verify distribution computations + +verifyDistribution <- function(points, expected, mu, sigma, tol) { + rDistValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDistValues[i] <- pnorm(point, mu, sigma, log = FALSE) + } + output <- c("Distribution test mu = ",mu,", sigma = ", sigma) + if (assertEquals(expected, rDistValues, tol, "Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +#-------------------------------------------------------------------------- +cat("Normal test cases\n") + +mu <- 2.1 +sigma <- 1.4 + +distributionValues <- c(0.001, 0.01, 0.025, 0.05, 0.1, 0.999, + 0.990, 0.975, 0.950, 0.900) +distributionPoints <- c(-2.226325, -1.156887, -0.6439496, -0.2027951, + 0.3058278, 6.426325, 5.356887, 4.84395, 4.402795, 3.894172) + +verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol) + +distributionValues <- c(0.02275013, 0.1586553, 0.5, 0.8413447, + 0.9772499, 0.9986501, 0.9999683, 0.9999997) +distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma, + mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma, + mu + 5 * sigma) + +verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol) + +mu <- 0 +sigma <- 1 +distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma, + mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma, + mu + 5 * sigma) +verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol) + +mu <- 0 +sigma <- 0.1 +distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma, + mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma, + mu + 5 * sigma) +verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol) + +displayDashes(WIDTH) diff --git a/src/experimental/R/poissonTestCases b/src/experimental/R/poissonTestCases new file mode 100644 index 000000000..8397d71f3 --- /dev/null +++ b/src/experimental/R/poissonTestCases @@ -0,0 +1,115 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate Poisson distribution tests in +# org.apache.commons.math.distribution.PoissonDistributionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# R functions used +# dpois(x, lambda, log = FALSE) <-- density +# ppois(q, lambda, lower.tail = TRUE, log.p = FALSE) <-- distribution +# pnorm(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) <-- normal dist. +#------------------------------------------------------------------------------ +tol <- 1E-10 +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions + +# function to verify density computations + +verifyDensity <- function(points, expected, lambda, tol) { + rDensityValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDensityValues[i] <- dpois(point, lambda, log = FALSE) + } + output <- c("Density test lambda = ", lambda) + if (assertEquals(expected, rDensityValues, tol, "Density Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# function to verify distribution computations + +verifyDistribution <- function(points, expected, lambda, tol) { + rDistValues <- rep(0, length(points)) + i <- 0 + for (point in points) { + i <- i + 1 + rDistValues[i] <- ppois(point, lambda, log = FALSE) + } + output <- c("Distribution test lambda = ", lambda) + if (assertEquals(expected, rDistValues, tol, "Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# function to verify normal approximation + +verifyNormalApproximation <- function(expected, lambda, lower, upper, tol) { + rValue <- pnorm(upper, mean=lambda, sd=sqrt(lambda), lower.tail = TRUE, + log.p = FALSE) - + pnorm(lower, mean=lambda, sd=sqrt(lambda), lower.tail = TRUE, + log.p = FALSE) + output <- c("Normal approx. test lambda = ", lambda, " upper = ", + upper, " lower = ", lower) + if (assertEquals(expected, rValue, tol, "Distribution Values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +cat("Poisson distribution test cases\n") + +# stock tests + +lambda <- 4.0 +densityPoints <- c(-1,0,1,2,3,4,5,10,20) +densityValues <- c(0, 0.0183156388887, 0.073262555555, 0.14652511111, + 0.195366814813, 0.195366814813, 0.156293451851, + 0.00529247667642, 8.27746364655e-09) +verifyDensity(densityPoints, densityValues, lambda, tol) + + +distributionPoints <- c(-1, 0, 1, 2, 3, 4, 5, 10, 20) +distributionValues <- c(0, 0.0183156388887, 0.0915781944437, 0.238103305554, + 0.433470120367, 0.62883693518, 0.78513038703, + 0.99716023388, 0.999999998077) +verifyDistribution(distributionPoints, distributionValues, lambda, tol) + +# normal approximation tests + +lambda <- 100 +verifyNormalApproximation(0.706281887248, lambda, 89.5, 110.5, tol) + +lambda <- 10000 +verifyNormalApproximation(0.820070051552, lambda, 9899.5, 10200.5, tol) + +displayDashes(WIDTH) + + + + + diff --git a/src/experimental/R/regressionTestCases b/src/experimental/R/regressionTestCases new file mode 100644 index 000000000..eafb83322 --- /dev/null +++ b/src/experimental/R/regressionTestCases @@ -0,0 +1,158 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#----------------------------------------------------------------------- +# R source file to validate Binomial distribution tests in +# org.apache.commons.math.stat.regression.SimpleRegressionTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("") +# +# Output will be written to a file named "regTestResults" +# in the directory from which R was launched +# +#------------------------------------------------------------------------------ +tol <- 1E-8 +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions +#------------------------------------------------------------------------------ +# infData example + +cat("Regresssion test cases\n") + +x <- c(15.6, 26.8,37.8,36.4,35.5,18.6,15.3,7.9,0.0) +y <- c(5.2, 6.1, 8.7, 8.5, 8.8, 4.9, 4.5, 2.5, 1.1) +model<-lm(y~x) +coef <- coefficients(summary(model)) +intercept <- coef[1, 1] +interceptStd <- coef[1, 2] +slope <- coef[2, 1] +slopeStd <- coef[2, 2] +significance <- coef[2, 4] + +output <- "InfData std error test" +if (assertEquals(0.011448491, slopeStd, tol, "Slope Standard Error") && + assertEquals(0.286036932, interceptStd, tol, "Intercept Standard Error")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +output <- "InfData significance test" +if (assertEquals(4.596e-07, significance, tol, "Significance")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +output <- "InfData conf interval test" +ci<-confint(model) +# ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept +# ci[2,1] = lower 2.5% bound for slope, ci[2,2] = upper 97.5% for slope +halfWidth <- ci[2,2] - slope +if (assertEquals(0.0270713794287, halfWidth, tol, + "Slope conf. interval half-width")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} +#------------------------------------------------------------------------------ +# Norris dataset from NIST examples + +y <- c(0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9, 559.2, +0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3, 998.0, 888.8, 119.6, +0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9, 10.2, 117.6, 228.9, 668.4, 449.2, +0.2) +x <- c(0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0, 558.2, +0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1, 995.8, 887.6, 120.2, +0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0, 11.1, 118.3, 229.2, 669.1, 448.9, +0.5) +model<-lm(y~x) +coef <- coefficients(summary(model)) +intercept <- coef[1, 1] +interceptStd <- coef[1, 2] +slope <- coef[2, 1] +slopeStd <- coef[2, 2] + +output <- "Norris std error test" +if (assertEquals(0.429796848199937E-03, slopeStd, tol, "Slope Standard Error") + && assertEquals(0.232818234301152, interceptStd, tol, + "Intercept Standard Error")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} +#------------------------------------------------------------------------------ +# infData2 -- bad fit example +# +x <- c(1,2,3,4,5,6) +y <- c(1,0,5,2,-1,12) +model<-lm(y~x) +coef <- coefficients(summary(model)) +intercept <- coef[1, 1] +interceptStd <- coef[1, 2] +slope <- coef[2, 1] +slopeStd <- coef[2, 2] +significance <- coef[2, 4] + +output <- "InfData2 std error test" +if (assertEquals(1.07260253, slopeStd, tol, "Slope Standard Error") && + assertEquals(4.17718672, interceptStd, tol, "Intercept Standard Error")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +output <- "InfData2 significance test" +if (assertEquals(0.261829133982, significance, tol, "Significance")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +output <- "InfData2 conf interval test" +ci<-confint(model) +# ci[1,1] = lower 2.5% bound for intercept, ci[1,2] = upper 97.5% for intercept +# ci[2,1] = lower 2.5% bound for slope, ci[2,2] = upper 97.5% for slope +halfWidth <- ci[2,2] - slope +if (assertEquals(2.97802204827, halfWidth, tol, + "Slope conf. interval half-width")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} +#------------------------------------------------------------------------------ +# Correlation example + +x <- c(101.0, 100.1, 100.0, 90.6, 86.5, 89.7, 90.6, 82.8, 70.1, 65.4, + 61.3, 62.5, 63.6, 52.6, 59.7, 59.5, 61.3) +y <- c(99.2, 99.0, 100.0, 111.6, 122.2, 117.6, 121.1, 136.0, 154.2, 153.6, + 158.5, 140.6, 136.2, 168.0, 154.3, 149.0, 165.5) + +output <- "Correlation test" +if (assertEquals(-0.94663767742, cor(x,y, method="pearson"), tol, + "Correlation coefficient")) { + displayPadded(output, SUCCEEDED, WIDTH) +} else { + displayPadded(output, FAILED, WIDTH) +} + +displayDashes(WIDTH) + + + + diff --git a/src/experimental/R/testAll b/src/experimental/R/testAll new file mode 100644 index 000000000..702ab7afb --- /dev/null +++ b/src/experimental/R/testAll @@ -0,0 +1,45 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to run all commons-math R verification tests +# +# To run the test, install R, put this file and all other o.a.c.math R +# verification tests and the testfunctions utilities file into the same +# directory, launch R from this directory and then enter +# source("") +# +# To redirect output to a file, uncomment the following line, substituting +# another file path if you like (default behavior is to write the file to the +# current directory). +# +# sink("testResults") +#------------------------------------------------------------------------------ +# distribution +source("binomialTestCases") +source("normalTestCases") +source("poissonTestCases") +source("hypergeometricTestCases") +source("exponentialTestCases") + +# regression +source("regressionTestCases") + +# inference +source("chiSquareTestCases") +#------------------------------------------------------------------------------ +# if output has been diverted, change it back +if (sink.number()) { + sink() +} diff --git a/src/experimental/R/testFunctions b/src/experimental/R/testFunctions new file mode 100644 index 000000000..d60f32c69 --- /dev/null +++ b/src/experimental/R/testFunctions @@ -0,0 +1,66 @@ +# Copyright 2004 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# +# Utility functions used in R comparison tests. +# +#------------------------------------------------------------------------------ +# Global constants +#------------------------------------------------------------------------------ +WIDTH <- 80 # screen size constant for display functions +SUCCEEDED <- "SUCCEEDED" +FAILED <- "FAILED" +options(digits=12) # display 12 digits throughout +#------------------------------------------------------------------------------ +# Comparison functions +#------------------------------------------------------------------------------ +# Tests to see if and are within of +# one another in the sup norm. +# +# Returns 1 if no pair of corresponding entries differs by more than abs; +# otherwise displays and returns 0. +# Works for both vectors and scalar values. +# +assertEquals <- function(expected, observed, tol, message) { + if(any(abs(expected - observed) > tol)) { + cat("FAILURE: ",message,"\n") + cat("EXPECTED: ",expected,"\n") + cat("OBSERVED: ",observed,"\n") + return(0) + } else { + return(1) + } +} +#------------------------------------------------------------------------------ +# Display functions +#------------------------------------------------------------------------------ +# Displays n-col dashed line. +# +displayDashes <- function(n) { + cat(rep("-",n),"\n",sep='') + return(1) +} +#------------------------------------------------------------------------------ +# Displays ...... with enough dots in between to make cols, +# followed by a new line character. Blows up if is longer than +# cols by itself. +# +# Expects and to be strings (character vectors). +# +displayPadded <- function(start, end, n) { + len = sum(nchar(start)) + sum(nchar(end)) + cat(start, rep(".", n - len), end, "\n",sep='') + return(1) +}