diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2D.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2D.java index 3a5a126a2..17055da51 100644 --- a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2D.java +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2D.java @@ -20,6 +20,7 @@ package org.apache.commons.math4.ml.neuralnet.twod; import java.util.List; import java.util.ArrayList; import java.util.Iterator; +import java.util.Collection; import java.io.Serializable; import java.io.ObjectInputStream; @@ -30,6 +31,10 @@ import org.apache.commons.math4.ml.neuralnet.FeatureInitializer; import org.apache.commons.math4.ml.neuralnet.Network; import org.apache.commons.math4.ml.neuralnet.Neuron; import org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood; +import org.apache.commons.math4.ml.neuralnet.MapRanking; +import org.apache.commons.math4.ml.neuralnet.twod.util.LocationFinder; +import org.apache.commons.math4.ml.distance.DistanceMeasure; +import org.apache.commons.math4.ml.distance.EuclideanDistance; /** * Neural network with the topology of a two-dimensional surface. @@ -339,6 +344,17 @@ public class NeuronSquareMesh2D return location == null ? null : getNeuron(location[0], location[1]); } + /** + * Computes various {@link DataVisualization indicators} of the quality + * of the representation of the given {@code data} by this map. + * + * @param data Features. + * @return a new instance holding quality indicators. + */ + public DataVisualization computeQualityIndicators(Iterable data) { + return DataVisualization.from(copy(), data); + } + /** * Computes the location of a neighbouring neuron. * Returns {@code null} if the resulting location is not part @@ -625,4 +641,227 @@ public class NeuronSquareMesh2D featuresList); } } + + /** + * Miscellaneous indicators of the map quality: + * + */ + public static class DataVisualization { + /** Distance function. */ + private static final DistanceMeasure DISTANCE = new EuclideanDistance(); + /** Total number of samples. */ + private final int numberOfSamples; + /** Hit histogram. */ + private final double[][] hitHistogram; + /** Quantization error. */ + private final double[][] quantizationError; + /** Mean quantization error. */ + private final double meanQuantizationError; + /** Topographic error. */ + private final double[][] topographicError; + /** Mean topographic error. */ + private final double meanTopographicError; + /** U-matrix. */ + private final double[][] uMatrix; + + /** + * @param numberOfSamples Number of samples. + * @param hitHistogram Hit histogram. + * @param quantizationError Quantization error. + * @param topographicError Topographic error. + * @param uMatrix U-matrix. + */ + private DataVisualization(int numberOfSamples, + double[][] hitHistogram, + double[][] quantizationError, + double[][] topographicError, + double[][] uMatrix) { + this.numberOfSamples = numberOfSamples; + this.hitHistogram = hitHistogram; + this.quantizationError = quantizationError; + meanQuantizationError = hitWeightedMean(quantizationError, hitHistogram); + this.topographicError = topographicError; + meanTopographicError = hitWeightedMean(topographicError, hitHistogram); + this.uMatrix = uMatrix; + } + + /** + * @param map Map + * @param data Data. + * @return the metrics. + */ + static DataVisualization from(NeuronSquareMesh2D map, + Iterable data) { + final LocationFinder finder = new LocationFinder(map); + final MapRanking rank = new MapRanking(map, DISTANCE); + final Network net = map.getNetwork(); + final int nR = map.getNumberOfRows(); + final int nC = map.getNumberOfColumns(); + + // Hit bins. + final int[][] hitCounter = new int[nR][nC]; + // Hit bins. + final double[][] hitHistogram = new double[nR][nC]; + // Quantization error bins. + final double[][] quantizationError = new double[nR][nC]; + // Topographic error bins. + final double[][] topographicError = new double[nR][nC]; + // U-matrix. + final double[][] uMatrix = new double[nR][nC]; + + int numSamples = 0; + for (double[] sample : data) { + ++numSamples; + + final List winners = rank.rank(sample, 2); + final Neuron best = winners.get(0); + final Neuron secondBest = winners.get(1); + + final LocationFinder.Location locBest = finder.getLocation(best); + final int rowBest = locBest.getRow(); + final int colBest = locBest.getColumn(); + // Increment hit counter. + hitCounter[rowBest][colBest] += 1; + + // Aggregate quantization error. + quantizationError[rowBest][colBest] += DISTANCE.compute(sample, best.getFeatures()); + + // Aggregate topographic error. + if (!net.getNeighbours(best).contains(secondBest)) { + // Increment count if first and second best matching units + // are not neighbours. + topographicError[rowBest][colBest] += 1; + } + } + + for (int r = 0; r < nR; r++) { + for (int c = 0; c < nC; c++) { + final Neuron neuron = map.getNeuron(r, c); + final Collection neighbours = net.getNeighbours(neuron); + final double[] features = neuron.getFeatures(); + double uDistance = 0; + int neighbourCount = 0; + for (Neuron n : neighbours) { + ++neighbourCount; + uDistance += DISTANCE.compute(features, n.getFeatures()); + } + + final int hitCount = hitCounter[r][c]; + if (hitCount != 0) { + hitHistogram[r][c] = hitCount / (double) numSamples; + quantizationError[r][c] /= hitCount; + topographicError[r][c] /= hitCount; + uMatrix[r][c] = uDistance / neighbourCount; + } + } + } + + return new DataVisualization(numSamples, + hitHistogram, + quantizationError, + topographicError, + uMatrix); + } + + /** + * @return the total number of samples. + */ + public final int getNumberOfSamples() { + return numberOfSamples; + } + + /** + * @return the quantization error. + * Each bin will contain the average of the distances between samples + * mapped to the corresponding unit and the weight vector of that unit. + * @see #getMeanQuantizationError() + */ + public double[][] getQuantizationError() { + return copy(quantizationError); + } + + /** + * @return the topographic error. + * Each bin will contain the number of data for which the first and + * second best matching units are not adjacent in the map. + * @see #getMeanTopographicError() + */ + public double[][] getTopographicError() { + return copy(topographicError); + } + + /** + * @return the hits histogram (normalized). + * Each bin will contain the number of data for which the corresponding + * neuron is the best matching unit. + */ + public double[][] getNormalizedHits() { + return copy(hitHistogram); + } + + /** + * @return the U-matrix. + * Each bin will contain the average distance between a unit and all its + * neighbours will be computed (and stored in the pixel corresponding to + * that unit of the 2D-map). The number of neighbours taken into account + * depends on the network {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood + * neighbourhood type}. + */ + public double[][] getUMatrix() { + return copy(uMatrix); + } + + /** + * @return the mean (hit-weighted) quantization error. + * @see #getQuantizationError() + */ + public double getMeanQuantizationError() { + return meanQuantizationError; + } + + /** + * @return the mean (hit-weighted) topographic error. + * @see #getTopographicError() + */ + public double getMeanTopographicError() { + return meanTopographicError; + } + + /** + * @param orig Source. + * @return a deep copy of the original array. + */ + private static double[][] copy(double[][] orig) { + final double[][] copy = new double[orig.length][]; + for (int i = 0; i < orig.length; i++) { + copy[i] = orig[i].clone(); + } + + return copy; + } + + /** + * @param metrics Metrics. + * @param normalizedHits Hits histogram (normalized). + * @return the hit-weighted mean of the given {@code metrics}. + */ + private double hitWeightedMean(double[][] metrics, + double[][] normalizedHits) { + double mean = 0; + final int rows = metrics.length; + final int cols = metrics[0].length; + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + mean += normalizedHits[i][j] * metrics[i][j]; + } + } + + return mean; + } + } } diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java deleted file mode 100644 index a88fa1d0c..000000000 --- a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math4.ml.neuralnet.twod.util; - -import org.apache.commons.math4.ml.neuralnet.MapRanking; -import org.apache.commons.math4.ml.neuralnet.Neuron; -import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; -import org.apache.commons.math4.ml.distance.DistanceMeasure; - -/** - * Computes the hit histogram. - * Each bin will contain the number of data for which the corresponding - * neuron is the best matching unit. - * @since 3.6 - */ -public class HitHistogram implements MapDataVisualization { - /** Distance. */ - private final DistanceMeasure distance; - /** Whether to compute relative bin counts. */ - private final boolean normalizeCount; - - /** - * @param normalizeCount Whether to compute relative bin counts. - * If {@code true}, the data count in each bin will be divided by the total - * number of samples. - * @param distance Distance. - */ - public HitHistogram(boolean normalizeCount, - DistanceMeasure distance) { - this.normalizeCount = normalizeCount; - this.distance = distance; - } - - /** {@inheritDoc} */ - @Override - public double[][] computeImage(NeuronSquareMesh2D map, - Iterable data) { - final int nR = map.getNumberOfRows(); - final int nC = map.getNumberOfColumns(); - - final LocationFinder finder = new LocationFinder(map); - final MapRanking rank = new MapRanking(map.getNetwork(), distance); - - // Totla number of samples. - int numSamples = 0; - // Hit bins. - final double[][] hit = new double[nR][nC]; - - for (double[] sample : data) { - final Neuron best = rank.rank(sample, 1).get(0); - - final LocationFinder.Location loc = finder.getLocation(best); - final int row = loc.getRow(); - final int col = loc.getColumn(); - hit[row][col] += 1; - - ++numSamples; - } - - if (normalizeCount) { - for (int r = 0; r < nR; r++) { - for (int c = 0; c < nC; c++) { - hit[r][c] /= numSamples; - } - } - } - - return hit; - } -} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/QuantizationError.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/QuantizationError.java deleted file mode 100644 index f2bc9de24..000000000 --- a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/QuantizationError.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math4.ml.neuralnet.twod.util; - -import org.apache.commons.math4.ml.neuralnet.MapRanking; -import org.apache.commons.math4.ml.neuralnet.Neuron; -import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; -import org.apache.commons.math4.ml.distance.DistanceMeasure; - -/** - * Computes the quantization error histogram. - * Each bin will contain the average of the distances between samples - * mapped to the corresponding unit and the weight vector of that unit. - * @since 3.6 - */ -public class QuantizationError implements MapDataVisualization { - /** Distance. */ - private final DistanceMeasure distance; - - /** - * @param distance Distance. - */ - public QuantizationError(DistanceMeasure distance) { - this.distance = distance; - } - - /** {@inheritDoc} */ - @Override - public double[][] computeImage(NeuronSquareMesh2D map, - Iterable data) { - final int nR = map.getNumberOfRows(); - final int nC = map.getNumberOfColumns(); - - final LocationFinder finder = new LocationFinder(map); - final MapRanking rank = new MapRanking(map.getNetwork(), distance); - - // Hit bins. - final int[][] hit = new int[nR][nC]; - // Error bins. - final double[][] error = new double[nR][nC]; - - for (double[] sample : data) { - final Neuron best = rank.rank(sample, 1).get(0); - - final LocationFinder.Location loc = finder.getLocation(best); - final int row = loc.getRow(); - final int col = loc.getColumn(); - hit[row][col] += 1; - error[row][col] += distance.compute(sample, best.getFeatures()); - } - - for (int r = 0; r < nR; r++) { - for (int c = 0; c < nC; c++) { - final int count = hit[r][c]; - if (count != 0) { - error[r][c] /= count; - } - } - } - - return error; - } -} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java deleted file mode 100644 index 758e6725f..000000000 --- a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math4.ml.neuralnet.twod.util; - -import java.util.List; -import org.apache.commons.math4.ml.neuralnet.MapRanking; -import org.apache.commons.math4.ml.neuralnet.Neuron; -import org.apache.commons.math4.ml.neuralnet.Network; -import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; -import org.apache.commons.math4.ml.distance.DistanceMeasure; - -/** - * Computes the topographic error histogram. - * Each bin will contain the number of data for which the first and - * second best matching units are not adjacent in the map. - * @since 3.6 - */ -public class TopographicErrorHistogram implements MapDataVisualization { - /** Distance. */ - private final DistanceMeasure distance; - /** Whether to compute relative bin counts. */ - private final boolean relativeCount; - - /** - * @param relativeCount Whether to compute relative bin counts. - * If {@code true}, the data count in each bin will be divided by the total - * number of samples mapped to the neuron represented by that bin. - * @param distance Distance. - */ - public TopographicErrorHistogram(boolean relativeCount, - DistanceMeasure distance) { - this.relativeCount = relativeCount; - this.distance = distance; - } - - /** {@inheritDoc} */ - @Override - public double[][] computeImage(NeuronSquareMesh2D map, - Iterable data) { - final int nR = map.getNumberOfRows(); - final int nC = map.getNumberOfColumns(); - - final LocationFinder finder = new LocationFinder(map); - final Network net = map.getNetwork(); - final MapRanking rank = new MapRanking(net, distance); - - // Hit bins. - final int[][] hit = new int[nR][nC]; - // Error bins. - final double[][] error = new double[nR][nC]; - - for (double[] sample : data) { - final List p = rank.rank(sample, 2); - final Neuron best = p.get(0); - - final LocationFinder.Location loc = finder.getLocation(best); - final int row = loc.getRow(); - final int col = loc.getColumn(); - hit[row][col] += 1; - - if (!net.getNeighbours(best).contains(p.get(1))) { - // Increment count if first and second best matching units - // are not neighbours. - error[row][col] += 1; - } - } - - if (relativeCount) { - for (int r = 0; r < nR; r++) { - for (int c = 0; c < nC; c++) { - error[r][c] /= hit[r][c]; - } - } - } - - return error; - } -} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java index 0fa20028f..9fa931861 100644 --- a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java @@ -17,59 +17,36 @@ package org.apache.commons.math4.ml.neuralnet.twod.util; -import java.util.Collection; import org.apache.commons.math4.ml.neuralnet.Neuron; -import org.apache.commons.math4.ml.neuralnet.Network; import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; import org.apache.commons.math4.ml.distance.DistanceMeasure; /** * U-Matrix * visualization of high-dimensional data projection. + * The 8 individual inter-units distances will be + * {@link #computeImage(NeuronSquareMesh2D) computed}. They will be + * stored in additional pixels around each of the original units of the + * 2D-map. The additional pixels that lie along a "diagonal" are shared + * by two pairs of units: their value will be set to the average + * distance between the units belonging to each of the pairs. The value + * zero will be stored in the pixel corresponding to the location of a + * unit of the 2D-map. + * * @since 3.6 + * @see NeuronSquareMesh2D.DataVisualization#getUMatrix() */ public class UnifiedDistanceMatrix implements MapVisualization { - /** Whether to show distance between each pair of neighbouring units. */ - private final boolean individualDistances; /** Distance. */ private final DistanceMeasure distance; /** - * Simple constructor. - * - * @param individualDistances If {@code true}, the 8 individual - * inter-units distances will be {@link #computeImage(NeuronSquareMesh2D) - * computed}. They will be stored in additional pixels around each of - * the original units of the 2D-map. The additional pixels that lie - * along a "diagonal" are shared by two pairs of units: their - * value will be set to the average distance between the units belonging - * to each of the pairs. The value zero will be stored in the pixel - * corresponding to the location of a unit of the 2D-map. - *
- * If {@code false}, only the average distance between a unit and all its - * neighbours will be computed (and stored in the pixel corresponding to - * that unit of the 2D-map). In that case, the number of neighbours taken - * into account depends on the network's - * {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood - * neighbourhood type}. * @param distance Distance. */ - public UnifiedDistanceMatrix(boolean individualDistances, - DistanceMeasure distance) { - this.individualDistances = individualDistances; + public UnifiedDistanceMatrix(DistanceMeasure distance) { this.distance = distance; } - /** {@inheritDoc} */ - @Override - public double[][] computeImage(NeuronSquareMesh2D map) { - if (individualDistances) { - return individualDistances(map); - } else { - return averageDistances(map); - } - } - /** * Computes the distances between a unit of the map and its * neighbours. @@ -81,7 +58,8 @@ public class UnifiedDistanceMatrix implements MapVisualization { * @param map Map. * @return an image representing the individual distances. */ - private double[][] individualDistances(NeuronSquareMesh2D map) { + @Override + public double[][] computeImage(NeuronSquareMesh2D map) { final int numRows = map.getNumberOfRows(); final int numCols = map.getNumberOfColumns(); @@ -174,37 +152,4 @@ public class UnifiedDistanceMatrix implements MapVisualization { return uMatrix; } - - /** - * Computes the distances between a unit of the map and its neighbours. - * - * @param map Map. - * @return an image representing the average distances. - */ - private double[][] averageDistances(NeuronSquareMesh2D map) { - final int numRows = map.getNumberOfRows(); - final int numCols = map.getNumberOfColumns(); - final double[][] uMatrix = new double[numRows][numCols]; - - final Network net = map.getNetwork(); - - for (int i = 0; i < numRows; i++) { - for (int j = 0; j < numCols; j++) { - final Neuron neuron = map.getNeuron(i, j); - final Collection neighbours = net.getNeighbours(neuron); - final double[] features = neuron.getFeatures(); - - double d = 0; - int count = 0; - for (Neuron n : neighbours) { - ++count; - d += distance.compute(features, n.getFeatures()); - } - - uMatrix[i][j] = d / count; - } - } - - return uMatrix; - } } diff --git a/src/test/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2DTest.java b/src/test/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2DTest.java index 693f59b9e..55732a75f 100644 --- a/src/test/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2DTest.java +++ b/src/test/java/org/apache/commons/math4/ml/neuralnet/twod/NeuronSquareMesh2DTest.java @@ -25,6 +25,9 @@ import java.io.ObjectOutputStream; import java.util.Collection; import java.util.Set; import java.util.HashSet; +import java.util.List; +import java.util.stream.StreamSupport; +import java.util.stream.Collectors; import org.apache.commons.math4.exception.NumberIsTooSmallException; import org.apache.commons.math4.exception.OutOfRangeException; @@ -872,4 +875,41 @@ public class NeuronSquareMesh2DTest { Assert.assertTrue(fromMap.contains(n)); } } + + @Test + public void testDataVisualization() { + final FeatureInitializer[] initArray = { init }; + final NeuronSquareMesh2D map = new NeuronSquareMesh2D(3, true, + 3, true, + SquareNeighbourhood.VON_NEUMANN, + initArray); + + // Trivial test: Use neurons' features as data. + + final List data = StreamSupport.stream(map.spliterator(), false) + .map(n -> n.getFeatures()) + .collect(Collectors.toList()); + final NeuronSquareMesh2D.DataVisualization v = map.computeQualityIndicators(data); + + final int numRows = map.getNumberOfRows(); + final int numCols = map.getNumberOfColumns(); + + // Test hits. + final double[][] hits = v.getNormalizedHits(); + final double expectedHits = 1d / (numRows * numCols); + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + Assert.assertEquals(expectedHits, hits[i][j], 0d); + } + } + + // Test quantization error. + final double[][] qe = v.getQuantizationError(); + final double expectedQE = 0; + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + Assert.assertEquals(expectedQE, qe[i][j], 0d); + } + } + } }