MATH-1270

Various SOFM visualizations: unified distance matrix, hit histogram,
smoothed data histogram, topographic error.
This commit is contained in:
Gilles 2015-09-14 02:08:30 +02:00
parent 999761ba1b
commit ce131449a4
4 changed files with 476 additions and 0 deletions

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.ml.neuralnet.twod.util;
import org.apache.commons.math4.ml.neuralnet.MapUtils;
import org.apache.commons.math4.ml.neuralnet.Neuron;
import org.apache.commons.math4.ml.neuralnet.Network;
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
/**
* Computes the hit histogram.
* Each bin will contain the number of data for which the corresponding
* neuron is the best matching unit.
*/
public class HitHistogram implements MapDataVisualization {
/** Distance. */
private final DistanceMeasure distance;
/** Whether to compute relative bin counts. */
private final boolean normalizeCount;
/**
* @param normalizeCount Whether to compute relative bin counts.
* If {@code true}, the data count in each bin will be divided by the total
* number of samples.
* @param distance Distance.
*/
public HitHistogram(boolean normalizeCount,
DistanceMeasure distance) {
this.normalizeCount = normalizeCount;
this.distance = distance;
}
/** {@inheritDoc} */
@Override
public double[][] computeImage(NeuronSquareMesh2D map,
Iterable<double[]> data) {
final int nR = map.getNumberOfRows();
final int nC = map.getNumberOfColumns();
final Network net = map.getNetwork();
final LocationFinder finder = new LocationFinder(map);
// Totla number of samples.
int numSamples = 0;
// Hit bins.
final double[][] hit = new double[nR][nC];
for (double[] sample : data) {
final Neuron best = MapUtils.findBest(sample, map, distance);
final LocationFinder.Location loc = finder.getLocation(best);
final int row = loc.getRow();
final int col = loc.getColumn();
hit[row][col] += 1;
++numSamples;
}
if (normalizeCount) {
for (int r = 0; r < nR; r++) {
for (int c = 0; c < nC; c++) {
hit[r][c] /= numSamples;
}
}
}
return hit;
}
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.ml.neuralnet.twod.util;
import org.apache.commons.math4.ml.neuralnet.MapUtils;
import org.apache.commons.math4.ml.neuralnet.Neuron;
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
import org.apache.commons.math4.exception.NumberIsTooSmallException;
/**
* Visualization of high-dimensional data projection on a 2D-map.
* The method is described in
* <quote>
* <em>Using Smoothed Data Histograms for Cluster Visualization in Self-Organizing Maps</em>
* <br>
* by Elias Pampalk, Andreas Rauber and Dieter Merkl.
* </quote>
*/
public class SmoothedDataHistogram implements MapDataVisualization {
/** Smoothing parameter. */
private final int smoothingBins;
/** Distance. */
private final DistanceMeasure distance;
/** Normalization factor. */
private final double membershipNormalization;
/**
* @param smoothingBins Number of bins.
* @param distance Distance.
*/
public SmoothedDataHistogram(int smoothingBins,
DistanceMeasure distance) {
this.smoothingBins = smoothingBins;
this.distance = distance;
double sum = 0;
for (int i = 0; i < smoothingBins; i++) {
sum += smoothingBins - i;
}
this.membershipNormalization = 1d / sum;
}
/**
* {@inheritDoc}
*
* @throws NumberIsTooSmallException if the size of the {@code map}
* is smaller than the number of {@link #SmoothedDataHistogram(int,DistanceMeasure)
* smoothing bins}.
*/
@Override
public double[][] computeImage(NeuronSquareMesh2D map,
Iterable<double[]> data) {
final int nR = map.getNumberOfRows();
final int nC = map.getNumberOfColumns();
final int mapSize = nR * nC;
if (mapSize < smoothingBins) {
throw new NumberIsTooSmallException(mapSize, smoothingBins, true);
}
final LocationFinder finder = new LocationFinder(map);
// Histogram bins.
final double[][] histo = new double[nR][nC];
for (double[] sample : data) {
final Neuron[] sorted = MapUtils.sort(sample,
map.getNetwork(),
distance);
for (int i = 0; i < smoothingBins; i++) {
final LocationFinder.Location loc = finder.getLocation(sorted[i]);
final int row = loc.getRow();
final int col = loc.getColumn();
histo[row][col] += (smoothingBins - i) * membershipNormalization;
}
}
return histo;
}
}

View File

@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.ml.neuralnet.twod.util;
import org.apache.commons.math4.ml.neuralnet.MapUtils;
import org.apache.commons.math4.ml.neuralnet.Neuron;
import org.apache.commons.math4.ml.neuralnet.Network;
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
import org.apache.commons.math4.util.Pair;
/**
* Computes the topographic error histogram.
* Each bin will contain the number of data for which the first and
* second best matching units are not adjacent in the map.
*/
public class TopographicErrorHistogram implements MapDataVisualization {
/** Distance. */
private final DistanceMeasure distance;
/** Whether to compute relative bin counts. */
private final boolean relativeCount;
/**
* @param relativeCount Whether to compute relative bin counts.
* If {@code true}, the data count in each bin will be divided by the total
* number of samples mapped to the neuron represented by that bin.
* @param distance Distance.
*/
public TopographicErrorHistogram(boolean relativeCount,
DistanceMeasure distance) {
this.relativeCount = relativeCount;
this.distance = distance;
}
/** {@inheritDoc} */
@Override
public double[][] computeImage(NeuronSquareMesh2D map,
Iterable<double[]> data) {
final int nR = map.getNumberOfRows();
final int nC = map.getNumberOfColumns();
final Network net = map.getNetwork();
final LocationFinder finder = new LocationFinder(map);
// Hit bins.
final int[][] hit = new int[nR][nC];
// Error bins.
final double[][] error = new double[nR][nC];
for (double[] sample : data) {
final Pair<Neuron, Neuron> p = MapUtils.findBestAndSecondBest(sample, map, distance);
final Neuron best = p.getFirst();
final LocationFinder.Location loc = finder.getLocation(best);
final int row = loc.getRow();
final int col = loc.getColumn();
hit[row][col] += 1;
if (!net.getNeighbours(best).contains(p.getSecond())) {
// Increment count if first and second best matching units
// are not neighbours.
error[row][col] += 1;
}
}
if (relativeCount) {
for (int r = 0; r < nR; r++) {
for (int c = 0; c < nC; c++) {
error[r][c] /= hit[r][c];
}
}
}
return error;
}
}

View File

@ -0,0 +1,203 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.ml.neuralnet.twod.util;
import java.util.Collection;
import org.apache.commons.math4.ml.neuralnet.Neuron;
import org.apache.commons.math4.ml.neuralnet.Network;
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
/**
* <a href="http://en.wikipedia.org/wiki/U-Matrix">U-Matrix</a>
* visualization of high-dimensional data projection.
*/
public class UnifiedDistanceMatrix implements MapVisualization {
/** Whether to show distance between each pair of neighbouring units. */
private final boolean individualDistances;
/** Distance. */
private final DistanceMeasure distance;
/** Simple constructor.
* @param individualDistances If {@code true}, the 8 individual
* inter-units distances will be {@link #computeImage(NeuronSquareMesh2D)
* computed}. They will be stored in additional pixels around each of
* the original units of the 2D-map. The value zero will be stored in the
* pixel corresponding to the location of a unit of the 2D-map.
* If {@code false}, only the average distance between a unit and all its
* neighbours will be computed (and stored in the pixel corresponding to
* that unit of the 2D-map). In that case, the number of neighbours taken
* into account depends on the network's
* {@link org.apache.commons.math4.ml.neuralnet.SquareNeighbourhood
* neighbourhood type}.
* @param distance Distance.
*/
public UnifiedDistanceMatrix(boolean individualDistances,
DistanceMeasure distance) {
this.individualDistances = individualDistances;
this.distance = distance;
}
/** {@inheritDoc} */
@Override
public double[][] computeImage(NeuronSquareMesh2D map) {
if (individualDistances) {
return individualDistances(map);
} else {
return averageDistances(map);
}
}
/**
* Computes the distances between a unit of the map and its
* neighbours.
* The image will contain more pixels than the number of neurons
* in the given {@code map} because each neuron has 8 neighbours.
* The value zero will be stored in the pixels corresponding to
* the location of a map unit.
*
* @param map Map.
* @return an image representing the individual distances.
*/
private double[][] individualDistances(NeuronSquareMesh2D map) {
final int numRows = map.getNumberOfRows();
final int numCols = map.getNumberOfColumns();
final double[][] uMatrix = new double[numRows * 2 + 1][numCols * 2 + 1];
for (int i = 0; i < numRows; i++) {
// Current unit's row index in result image.
final int iR = 2 * i + 1;
for (int j = 0; j < numCols; j++) {
// Current unit's column index in result image.
final int jR = 2 * j + 1;
final double[] current = map.getNeuron(i, j).getFeatures();
Neuron neighbour;
// Top-left neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.LEFT,
NeuronSquareMesh2D.VerticalDirection.UP);
if (neighbour != null) {
uMatrix[iR - 1][jR - 1] = distance.compute(current,
neighbour.getFeatures());
}
// Top-center neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.CENTER,
NeuronSquareMesh2D.VerticalDirection.UP);
if (neighbour != null) {
uMatrix[iR - 1][jR] = distance.compute(current,
neighbour.getFeatures());
}
// Top-right neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
NeuronSquareMesh2D.VerticalDirection.UP);
if (neighbour != null) {
uMatrix[iR - 1][jR + 1] = distance.compute(current,
neighbour.getFeatures());
}
// Left neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.LEFT,
NeuronSquareMesh2D.VerticalDirection.CENTER);
if (neighbour != null) {
uMatrix[iR][jR - 1] = distance.compute(current,
neighbour.getFeatures());
}
// Right neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
NeuronSquareMesh2D.VerticalDirection.CENTER);
if (neighbour != null) {
uMatrix[iR][jR + 1] = distance.compute(current,
neighbour.getFeatures());
}
// Bottom-left neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.LEFT,
NeuronSquareMesh2D.VerticalDirection.DOWN);
if (neighbour != null) {
uMatrix[iR + 1][jR - 1] = distance.compute(current,
neighbour.getFeatures());
}
// Bottom-center neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.CENTER,
NeuronSquareMesh2D.VerticalDirection.DOWN);
if (neighbour != null) {
uMatrix[iR + 1][jR] = distance.compute(current,
neighbour.getFeatures());
}
// Bottom-right neighbour.
neighbour = map.getNeuron(i, j,
NeuronSquareMesh2D.HorizontalDirection.RIGHT,
NeuronSquareMesh2D.VerticalDirection.DOWN);
if (neighbour != null) {
uMatrix[iR + 1][jR + 1] = distance.compute(current,
neighbour.getFeatures());
}
}
}
return uMatrix;
}
/**
* Computes the distances between a unit of the map and its neighbours.
*
* @param map Map.
* @return an image representing the average distances.
*/
private double[][] averageDistances(NeuronSquareMesh2D map) {
final int numRows = map.getNumberOfRows();
final int numCols = map.getNumberOfColumns();
final double[][] uMatrix = new double[numRows][numCols];
final Network net = map.getNetwork();
for (int i = 0; i < numRows; i++) {
for (int j = 0; j < numCols; j++) {
final Neuron neuron = map.getNeuron(i, j);
final Collection<Neuron> neighbours = net.getNeighbours(neuron);
final double[] features = neuron.getFeatures();
double d = 0;
int count = 0;
for (Neuron n : neighbours) {
++count;
d += distance.compute(features, n.getFeatures());
}
uMatrix[i][j] = d / count;
}
}
return uMatrix;
}
}