From ce131449a4ca0b06c6bb27ee5d8d6d89b1467bbd Mon Sep 17 00:00:00 2001 From: Gilles Date: Mon, 14 Sep 2015 02:08:30 +0200 Subject: [PATCH] MATH-1270 Various SOFM visualizations: unified distance matrix, hit histogram, smoothed data histogram, topographic error. --- .../ml/neuralnet/twod/util/HitHistogram.java | 85 ++++++++ .../twod/util/SmoothedDataHistogram.java | 97 +++++++++ .../twod/util/TopographicErrorHistogram.java | 91 ++++++++ .../twod/util/UnifiedDistanceMatrix.java | 203 ++++++++++++++++++ 4 files changed, 476 insertions(+) create mode 100644 src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java create mode 100644 src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java create mode 100644 src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java create mode 100644 src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java new file mode 100644 index 000000000..536acfc55 --- /dev/null +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/HitHistogram.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math4.ml.neuralnet.twod.util; + +import org.apache.commons.math4.ml.neuralnet.MapUtils; +import org.apache.commons.math4.ml.neuralnet.Neuron; +import org.apache.commons.math4.ml.neuralnet.Network; +import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; +import org.apache.commons.math4.ml.distance.DistanceMeasure; + +/** + * Computes the hit histogram. + * Each bin will contain the number of data for which the corresponding + * neuron is the best matching unit. + */ +public class HitHistogram implements MapDataVisualization { + /** Distance. */ + private final DistanceMeasure distance; + /** Whether to compute relative bin counts. */ + private final boolean normalizeCount; + + /** + * @param normalizeCount Whether to compute relative bin counts. + * If {@code true}, the data count in each bin will be divided by the total + * number of samples. + * @param distance Distance. + */ + public HitHistogram(boolean normalizeCount, + DistanceMeasure distance) { + this.normalizeCount = normalizeCount; + this.distance = distance; + } + + /** {@inheritDoc} */ + @Override + public double[][] computeImage(NeuronSquareMesh2D map, + Iterable data) { + final int nR = map.getNumberOfRows(); + final int nC = map.getNumberOfColumns(); + + final Network net = map.getNetwork(); + final LocationFinder finder = new LocationFinder(map); + + // Totla number of samples. + int numSamples = 0; + // Hit bins. + final double[][] hit = new double[nR][nC]; + + for (double[] sample : data) { + final Neuron best = MapUtils.findBest(sample, map, distance); + + final LocationFinder.Location loc = finder.getLocation(best); + final int row = loc.getRow(); + final int col = loc.getColumn(); + hit[row][col] += 1; + + ++numSamples; + } + + if (normalizeCount) { + for (int r = 0; r < nR; r++) { + for (int c = 0; c < nC; c++) { + hit[r][c] /= numSamples; + } + } + } + + return hit; + } +} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java new file mode 100644 index 000000000..bdab57010 --- /dev/null +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/SmoothedDataHistogram.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math4.ml.neuralnet.twod.util; + +import org.apache.commons.math4.ml.neuralnet.MapUtils; +import org.apache.commons.math4.ml.neuralnet.Neuron; +import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; +import org.apache.commons.math4.ml.distance.DistanceMeasure; +import org.apache.commons.math4.exception.NumberIsTooSmallException; + +/** + * Visualization of high-dimensional data projection on a 2D-map. + * The method is described in + * + * Using Smoothed Data Histograms for Cluster Visualization in Self-Organizing Maps + *
+ * by Elias Pampalk, Andreas Rauber and Dieter Merkl. + *
+ */ +public class SmoothedDataHistogram implements MapDataVisualization { + /** Smoothing parameter. */ + private final int smoothingBins; + /** Distance. */ + private final DistanceMeasure distance; + /** Normalization factor. */ + private final double membershipNormalization; + + /** + * @param smoothingBins Number of bins. + * @param distance Distance. + */ + public SmoothedDataHistogram(int smoothingBins, + DistanceMeasure distance) { + this.smoothingBins = smoothingBins; + this.distance = distance; + + double sum = 0; + for (int i = 0; i < smoothingBins; i++) { + sum += smoothingBins - i; + } + + this.membershipNormalization = 1d / sum; + } + + /** + * {@inheritDoc} + * + * @throws NumberIsTooSmallException if the size of the {@code map} + * is smaller than the number of {@link #SmoothedDataHistogram(int,DistanceMeasure) + * smoothing bins}. + */ + @Override + public double[][] computeImage(NeuronSquareMesh2D map, + Iterable data) { + final int nR = map.getNumberOfRows(); + final int nC = map.getNumberOfColumns(); + + final int mapSize = nR * nC; + if (mapSize < smoothingBins) { + throw new NumberIsTooSmallException(mapSize, smoothingBins, true); + } + + final LocationFinder finder = new LocationFinder(map); + + // Histogram bins. + final double[][] histo = new double[nR][nC]; + + for (double[] sample : data) { + final Neuron[] sorted = MapUtils.sort(sample, + map.getNetwork(), + distance); + for (int i = 0; i < smoothingBins; i++) { + final LocationFinder.Location loc = finder.getLocation(sorted[i]); + final int row = loc.getRow(); + final int col = loc.getColumn(); + histo[row][col] += (smoothingBins - i) * membershipNormalization; + } + } + + return histo; + } +} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java new file mode 100644 index 000000000..0543ce04b --- /dev/null +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/TopographicErrorHistogram.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math4.ml.neuralnet.twod.util; + +import org.apache.commons.math4.ml.neuralnet.MapUtils; +import org.apache.commons.math4.ml.neuralnet.Neuron; +import org.apache.commons.math4.ml.neuralnet.Network; +import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; +import org.apache.commons.math4.ml.distance.DistanceMeasure; +import org.apache.commons.math4.util.Pair; + +/** + * Computes the topographic error histogram. + * Each bin will contain the number of data for which the first and + * second best matching units are not adjacent in the map. + */ +public class TopographicErrorHistogram implements MapDataVisualization { + /** Distance. */ + private final DistanceMeasure distance; + /** Whether to compute relative bin counts. */ + private final boolean relativeCount; + + /** + * @param relativeCount Whether to compute relative bin counts. + * If {@code true}, the data count in each bin will be divided by the total + * number of samples mapped to the neuron represented by that bin. + * @param distance Distance. + */ + public TopographicErrorHistogram(boolean relativeCount, + DistanceMeasure distance) { + this.relativeCount = relativeCount; + this.distance = distance; + } + + /** {@inheritDoc} */ + @Override + public double[][] computeImage(NeuronSquareMesh2D map, + Iterable data) { + final int nR = map.getNumberOfRows(); + final int nC = map.getNumberOfColumns(); + + final Network net = map.getNetwork(); + final LocationFinder finder = new LocationFinder(map); + + // Hit bins. + final int[][] hit = new int[nR][nC]; + // Error bins. + final double[][] error = new double[nR][nC]; + + for (double[] sample : data) { + final Pair p = MapUtils.findBestAndSecondBest(sample, map, distance); + final Neuron best = p.getFirst(); + + final LocationFinder.Location loc = finder.getLocation(best); + final int row = loc.getRow(); + final int col = loc.getColumn(); + hit[row][col] += 1; + + if (!net.getNeighbours(best).contains(p.getSecond())) { + // Increment count if first and second best matching units + // are not neighbours. + error[row][col] += 1; + } + } + + if (relativeCount) { + for (int r = 0; r < nR; r++) { + for (int c = 0; c < nC; c++) { + error[r][c] /= hit[r][c]; + } + } + } + + return error; + } +} diff --git a/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java new file mode 100644 index 000000000..4b6f67a1c --- /dev/null +++ b/src/main/java/org/apache/commons/math4/ml/neuralnet/twod/util/UnifiedDistanceMatrix.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math4.ml.neuralnet.twod.util; + +import java.util.Collection; +import org.apache.commons.math4.ml.neuralnet.Neuron; +import org.apache.commons.math4.ml.neuralnet.Network; +import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; +import org.apache.commons.math4.ml.distance.DistanceMeasure; + +/** + * U-Matrix + * visualization of high-dimensional data projection. + */ +public class UnifiedDistanceMatrix implements MapVisualization { + /** Whether to show distance between each pair of neighbouring units. */ + private final boolean individualDistances; + /** Distance. */ + private final DistanceMeasure distance; + + /** Simple constructor. + * @param individualDistances If {@code true}, the 8 individual + * inter-units distances will be {@link #computeImage(NeuronSquareMesh2D) + * computed}. They will be stored in additional pixels around each of + * the original units of the 2D-map. The value zero will be stored in the + * pixel corresponding to the location of a unit of the 2D-map. + * If {@code false}, only the average distance between a unit and all its + * neighbours will be computed (and stored in the pixel corresponding to + * that unit of the 2D-map). In that case, the number of neighbours taken + * into account depends on the network's + * {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood + * neighbourhood type}. + * @param distance Distance. + */ + public UnifiedDistanceMatrix(boolean individualDistances, + DistanceMeasure distance) { + this.individualDistances = individualDistances; + this.distance = distance; + } + + /** {@inheritDoc} */ + @Override + public double[][] computeImage(NeuronSquareMesh2D map) { + if (individualDistances) { + return individualDistances(map); + } else { + return averageDistances(map); + } + } + + /** + * Computes the distances between a unit of the map and its + * neighbours. + * The image will contain more pixels than the number of neurons + * in the given {@code map} because each neuron has 8 neighbours. + * The value zero will be stored in the pixels corresponding to + * the location of a map unit. + * + * @param map Map. + * @return an image representing the individual distances. + */ + private double[][] individualDistances(NeuronSquareMesh2D map) { + final int numRows = map.getNumberOfRows(); + final int numCols = map.getNumberOfColumns(); + + final double[][] uMatrix = new double[numRows * 2 + 1][numCols * 2 + 1]; + + for (int i = 0; i < numRows; i++) { + // Current unit's row index in result image. + final int iR = 2 * i + 1; + + for (int j = 0; j < numCols; j++) { + // Current unit's column index in result image. + final int jR = 2 * j + 1; + + final double[] current = map.getNeuron(i, j).getFeatures(); + Neuron neighbour; + + // Top-left neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.LEFT, + NeuronSquareMesh2D.VerticalDirection.UP); + if (neighbour != null) { + uMatrix[iR - 1][jR - 1] = distance.compute(current, + neighbour.getFeatures()); + } + + // Top-center neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.CENTER, + NeuronSquareMesh2D.VerticalDirection.UP); + if (neighbour != null) { + uMatrix[iR - 1][jR] = distance.compute(current, + neighbour.getFeatures()); + } + + // Top-right neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.RIGHT, + NeuronSquareMesh2D.VerticalDirection.UP); + if (neighbour != null) { + uMatrix[iR - 1][jR + 1] = distance.compute(current, + neighbour.getFeatures()); + } + + // Left neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.LEFT, + NeuronSquareMesh2D.VerticalDirection.CENTER); + if (neighbour != null) { + uMatrix[iR][jR - 1] = distance.compute(current, + neighbour.getFeatures()); + } + + // Right neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.RIGHT, + NeuronSquareMesh2D.VerticalDirection.CENTER); + if (neighbour != null) { + uMatrix[iR][jR + 1] = distance.compute(current, + neighbour.getFeatures()); + } + + // Bottom-left neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.LEFT, + NeuronSquareMesh2D.VerticalDirection.DOWN); + if (neighbour != null) { + uMatrix[iR + 1][jR - 1] = distance.compute(current, + neighbour.getFeatures()); + } + + // Bottom-center neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.CENTER, + NeuronSquareMesh2D.VerticalDirection.DOWN); + if (neighbour != null) { + uMatrix[iR + 1][jR] = distance.compute(current, + neighbour.getFeatures()); + } + + // Bottom-right neighbour. + neighbour = map.getNeuron(i, j, + NeuronSquareMesh2D.HorizontalDirection.RIGHT, + NeuronSquareMesh2D.VerticalDirection.DOWN); + if (neighbour != null) { + uMatrix[iR + 1][jR + 1] = distance.compute(current, + neighbour.getFeatures()); + } + } + } + + return uMatrix; + } + + /** + * Computes the distances between a unit of the map and its neighbours. + * + * @param map Map. + * @return an image representing the average distances. + */ + private double[][] averageDistances(NeuronSquareMesh2D map) { + final int numRows = map.getNumberOfRows(); + final int numCols = map.getNumberOfColumns(); + final double[][] uMatrix = new double[numRows][numCols]; + + final Network net = map.getNetwork(); + + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + final Neuron neuron = map.getNeuron(i, j); + final Collection neighbours = net.getNeighbours(neuron); + final double[] features = neuron.getFeatures(); + + double d = 0; + int count = 0; + for (Neuron n : neighbours) { + ++count; + d += distance.compute(features, n.getFeatures()); + } + + uMatrix[i][j] = d / count; + } + } + + return uMatrix; + } +}