Merge branch 'master' of tea.maggioni.xyz:maggicl/NC

This commit is contained in:
Claudio Maggioni 2020-11-25 15:21:13 +01:00
commit 0a99f022c2
22 changed files with 2636628 additions and 514237 deletions

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(1.011in,0.642in)}, at={(0.758in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -17,32 +17,32 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 1088\\ 0.5 965\\
1.5 965\\ 1.5 1794\\
2.5 869\\ 2.5 874\\
3.5 1798\\ 3.5 1087\\
4.5 1798\\ 4.5 1087\\
}; };
\end{axis} \end{axis}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(4.436in,0.642in)}, at={(3.327in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
ymin=0, ymin=0,
ymax=3500, ymax=3000,
axis background/.style={fill=white} axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 31\\ 0.5 20\\
1.5 1560\\ 1.5 2905\\
2.5 3098\\ 2.5 1768\\
3.5 31\\ 3.5 27\\
4.5 31\\ 4.5 27\\
}; };
\end{axis} \end{axis}
\end{tikzpicture}% \end{tikzpicture}%

366464
mp4/Cluster in cluster.tex Normal file

File diff suppressed because it is too large Load diff

308842
mp4/Corn.tex Normal file

File diff suppressed because it is too large Load diff

237686
mp4/Corners.tex Normal file

File diff suppressed because it is too large Load diff

199244
mp4/Full crescent.tex Normal file

File diff suppressed because it is too large Load diff

282200
mp4/Half crescent.tex Normal file

File diff suppressed because it is too large Load diff

51132
mp4/Outlier.tex Normal file

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -1,58 +0,0 @@
\documentclass[unicode,11pt,a4paper,oneside,numbers=endperiod,openany]{scrartcl}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{amsmath}
\input{assignment.sty}
\usepackage{pgfplots}
\pgfplotsset{compat=newest}
\usetikzlibrary{plotmarks}
\usetikzlibrary{arrows.meta}
\usepgfplotslibrary{patchplots}
\usepackage{grffile}
\usepackage{amsmath}
\usepackage{subcaption}
\usepgfplotslibrary{external}
\tikzexternalize
\begin{document}
\setassignment
\setduedate{Wednesday, 18 November 2020, 11:55 PM}
\serieheader{Numerical Computing}{2020}{Student: FULL NAME}{Discussed with: FULL NAME}{Solution for Project 4}{}
\newline
\assignmentpolicy
\begin{enumerate}
\item \textbf{Spectral clustering of non-convex sets [60 points]:}
\item \textbf{Spectral clustering of real-world graphs [40 points]:}
\end{enumerate}
\begin{figure}
\centering\input{airfoil1_clu.tex}
\caption{Graphs for \textit{Airfoil1}}
\end{figure}
\begin{figure}
\centering\input{barth_clu.tex}
\caption{Graphs for \textit{Barth}}
\end{figure}
\begin{figure}
\centering\input{grid2_clu.tex}
\caption{Graphs for \textit{Grid2}}
\end{figure}
\begin{figure}
\centering\input{3elt_clu.tex}
\caption{Graphs for \textit{3elt}}
\end{figure}
\begin{figure}
\end{figure}
\end{document}

View file

@ -13,7 +13,7 @@ addpath ../datasets/Meshes
% Coords used in this script % Coords used in this script
[Pts_spirals,Pts_clusterin,Pts_corn,Pts_halfk,Pts_fullmoon,Pts_out] = getPoints(); [Pts_spirals,Pts_clusterin,Pts_corn,Pts_halfk,Pts_fullmoon,Pts_out] = getPoints();
TITLES = ["Two Spirals", "Cluster in", "Corn", "Half crescent", "Full crescent", "Outlier"]; TITLES = ["Two Spirals", "Cluster in cluster", "Corners", "Half crescent", "Full crescent", "Outlier"];
RUNS = {Pts_spirals, Pts_clusterin, Pts_corn, Pts_halfk, Pts_fullmoon, Pts_out}; RUNS = {Pts_spirals, Pts_clusterin, Pts_corn, Pts_halfk, Pts_fullmoon, Pts_out};
KS = {2, 2, 4, 2, 2, 4}; KS = {2, 2, 4, 2, 2, 4};
@ -30,7 +30,7 @@ for i = 1:6
n = size(Pts, 1); n = size(Pts, 1);
% Create Gaussian similarity function % Create Gaussian similarity function
[S] = similarityfunc(Pts(:,1:2), 10 * log(n)); [S] = similarityfunc(Pts(:,1:2), 2 * log(n));
%% 1b) Find the minimal spanning tree of the full graph. Use the information %% 1b) Find the minimal spanning tree of the full graph. Use the information
% to determine a valid value for epsilon % to determine a valid value for epsilon
@ -65,5 +65,7 @@ for i = 1:6
subplot(1,2,2) subplot(1,2,2)
gplotmap(W,Pts,x_kmeans) gplotmap(W,Pts,x_kmeans)
title(strcat(TITLES(i), ': K-means clusters')) title(strcat(TITLES(i), ': K-means clusters'))
matlab2tikz('showInfo', false, strcat('../../', TITLES{i}, '.tex'));
end end

View file

@ -0,0 +1,45 @@
clear variables;
close all;
warning OFF;
addpath ../datasets
addpath ../datasets/Meshes
[Pts_spirals,Pts_clusterin,Pts_corn,Pts_halfk,Pts_fullmoon,Pts_out] = getPoints();
close all;
TITLES = ["Two Spirals", "Cluster in cluster", "Corners", "Half crescent", "Full crescent", "Outlier"];
RUNS = {Pts_spirals, Pts_clusterin, Pts_corn, Pts_halfk, Pts_fullmoon, Pts_out};
KS = {2, 2, 4, 2, 2, 4};
B = zeros(100, 6);
for j = 20:70
for i = 1:6
p = (j - 20) / 10;
% Specify the number of clusters
Pts = RUNS{i};
K = KS{i};
n = size(Pts, 1);
% Create Gaussian similarity function
[S] = similarityfunc(Pts(:,1:2), 10^p * log(n));
% Find the minimal spanning tree of the full graph. Use the information
% to determine a valid value for epsilon
H = minSpanTree(S);
epsilon = max(H(H > 0), [], 'all');
% Create the epsilon similarity graph
[G] = epsilonSimGraph(epsilon,Pts);
% Create the adjacency matrix for the epsilon case
W = S .* G;
% Create the Laplacian matrix and implement spectral clustering
[L,Diag] = CreateLapl(W);
B(j, i) = rcond(L);
fprintf("%s - %g: %g\n", TITLES(i), 10^p, B(j,i));
end
end

676732
mp4/Two Spirals.tex Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(1.011in,0.642in)}, at={(0.758in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -17,18 +17,18 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 971\\ 0.5 1055\\
1.5 1050\\ 1.5 973\\
2.5 1150\\ 2.5 1084\\
3.5 1082\\ 3.5 1141\\
4.5 1082\\ 4.5 1141\\
}; };
\end{axis} \end{axis}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(4.436in,0.642in)}, at={(3.327in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -38,11 +38,11 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 344\\ 0.5 407\\
1.5 739\\ 1.5 1860\\
2.5 1869\\ 2.5 705\\
3.5 1301\\ 3.5 1281\\
4.5 1301\\ 4.5 1281\\
}; };
\end{axis} \end{axis}
\end{tikzpicture}% \end{tikzpicture}%

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(1.011in,0.642in)}, at={(0.758in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -17,32 +17,32 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 1588\\ 0.5 1490\\
1.5 1490\\ 1.5 2195\\
2.5 2206\\ 2.5 1405\\
3.5 1407\\ 3.5 1601\\
4.5 1407\\ 4.5 1601\\
}; };
\end{axis} \end{axis}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(4.436in,0.642in)}, at={(3.327in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
ymin=0, ymin=0,
ymax=4000, ymax=3500,
axis background/.style={fill=white} axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 71\\ 0.5 3151\\
1.5 3617\\ 1.5 67\\
2.5 71\\ 2.5 3400\\
3.5 2932\\ 3.5 73\\
4.5 2932\\ 4.5 73\\
}; };
\end{axis} \end{axis}
\end{tikzpicture}% \end{tikzpicture}%

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(1.011in,0.642in)}, at={(0.758in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -17,18 +17,18 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 785\\ 0.5 1285\\
1.5 1305\\ 1.5 851\\
2.5 827\\ 2.5 380\\
3.5 379\\ 3.5 780\\
4.5 379\\ 4.5 780\\
}; };
\end{axis} \end{axis}
\begin{axis}[% \begin{axis}[%
width=2.603in, width=1.952in,
height=4.754in, height=3.566in,
at={(4.436in,0.642in)}, at={(3.327in,0.481in)},
scale only axis, scale only axis,
xmin=0.3, xmin=0.3,
xmax=4.7, xmax=4.7,
@ -38,11 +38,11 @@ axis background/.style={fill=white}
] ]
\addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {% \addplot[ybar interval, fill=mycolor1, fill opacity=0.6, draw=black, area legend] table[row sep=crcr] {%
x y\\ x y\\
0.5 1271\\ 0.5 238\\
1.5 1183\\ 1.5 604\\
2.5 604\\ 2.5 1271\\
3.5 238\\ 3.5 1183\\
4.5 238\\ 4.5 1183\\
}; };
\end{axis} \end{axis}
\end{tikzpicture}% \end{tikzpicture}%

View file

@ -0,0 +1,134 @@
\documentclass[unicode,11pt,a4paper,oneside,numbers=endperiod,openany]{scrartcl}
\usepackage{graphicx}
\input{assignment.sty}
\usepackage{pgfplots}
\pgfplotsset{compat=newest}
\usetikzlibrary{plotmarks}
\usetikzlibrary{arrows.meta}
\usepgfplotslibrary{patchplots}
\usepackage{grffile}
\usepackage{amsmath}
\usepgfplotslibrary{external}
\tikzexternalize
\begin{document}
\setassignment
\setduedate{Wednesday, 18 November 2020, 11:55 PM}
\serieheader{Numerical Computing}{2020}{Student: Claudio Maggioni}{Discussed with: --}{Solution for Project 4}{}
\newline
\assignmentpolicy
\section{Spectral clustering of non-convex sets [60 points]:}
Plots for the \textit{Two spirals}, \textit{Cluster in cluster}, \textit{Crescent moon}, \textit{Full moon} clustering for $K=2$ and for \textit{Corners}, \textit{Outlier} for $K=4$ can be found in figures~\ref{fig:setsa},~\ref{fig:setsb},~\ref{fig:setsc},~\ref{fig:setsd},~\ref{fig:setse}, and ~\ref{fig:setsf}. All the plots are reproducible by simply running \texttt{ClusterPoints.m} once.
\subsection{Observation on the \textit{Spiral} set of points}
It is possible to distinguish two distinct cluster in the \textit{Spiral} set: if we consider this set of points as two intertwined non-intersecting spiral shaped curves, then each of the spiral can be considered as a cluster.
However, it is possible that a naive clustering approach might not recognise these two clusters: since the spirals are intertwined and the points they are rotating on are close, averaging the points on each spiral leads to very close centroids and thus an algorithm heavily based on coordinate averaging (like k-means clustering) might have a hard time in identifying the spirals.
\subsection{Choice of $\sigma$ parameter for the Gaussian similarity function}
According to the recommendations and rules of thumb on $\epsilon$ neighboorhood graph based spectral clustering, the
$\sigma$ parameter, as a rule of thumb, should be chosen in the order of $\log(n)$. However, choosing
$\sigma = \log(n)$ produces ill-conditioned
i.e. (singular) Laplacian matrices for some graphs, which make spectral clustering inaccurate or impossible. Therefore,
I have chosen $\sigma = 2 \log(n)$. As the choice of this parameter is not governed by any strict law and this choice follows the rule
of thumb and produces results that do not raise suspicion on incorrectness, I sticked to this choice.
\section{Spectral clustering of real-world graphs [40 points]:}
\subsection{Plotting and commenting spectral and k-means clustering for several example graphs}
Plots of spectral and K-means clustering for graphs \textit{Airfoil}, \textit{Barth}, \textit{Grid2}, and \textit{3elt} can be
found respectively in figure~\ref{fig:air},~\ref{fig:bar},~\ref{fig:gri} and~\ref{fig:elt}. These graphs are reproducible by running \texttt{ClusterGraphs.m} once.
Overall, in all graphs spectral clustering seems to favour a more even distribution of vertices
along the various clusters, while K-means generates clusters that cover similar areas. The extreme
example of this is the \textit{3elt} graph, where spectral and k-means clustering with wild imbalances
respectively in cluster area and in cluster vertex count.
In the \textit{Airfoil} graph it is unclear how the graph should be partitioned: both clusterings seem artificial and arbitrary.
Again the comparison of cluster areas and cluster node counts follows what said above.
For the \textit{Grid2} graph, spectral clustering seems to form a more natural cluster set by cutting somewhat radially
along the center of the graph's ``hole''. In contrast, k-means clustering performs more artificial cuts along the y axis,
creating clusters that resemble even slices of a cake.
Both \textit{Barth} and \textit{3elt} clusterings differ drammatically between spectral and k-means clustering and, albeit
following the general observation stated above as well, both are not natural or obvious clusterings to human judgement.
\subsection{Vertex count for spectral and k-means clustering}
The table for the node counts of each cluster produced by spectral and k-means clustering of graphs \textit{Airfoil}, \textit{Grid2},
\textit{Barth}, and \textit{3elt} can be found in figure~\ref{fig:tab}. The table and histograms found under the figures in the last
section can be reproduced by running \texttt{ClusterGraphs.m} once.
As stated before, we observe that node counts for clusters generated by spectral clustering are more balanced with each other than the
ones produced by K-means.
\begin{figure}
\centering
\scalebox{.75}{\input{Two spirals.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Two Spirals}}\label{fig:setsa}
\end{figure}
\begin{figure}
\centering
\scalebox{.75}{\input{Cluster in cluster.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Cluster in cluster}}\label{fig:setsb}
\end{figure}
\begin{figure}
\centering
\scalebox{.75}{\input{Half crescent.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Half crescent}}\label{fig:setsc}
\end{figure}
\begin{figure}
\centering
\scalebox{.75}{\input{Full crescent.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Full crescent}}\label{fig:setsd}
\end{figure}
\begin{figure}
\centering
\scalebox{.75}{\input{Corners.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Corners}}\label{fig:setse}
\end{figure}
\begin{figure}
\centering
\scalebox{.75}{\input{Outlier.tex}}
\caption{Spectral and k-means clustering graphs for \textit{Outlier}}\label{fig:setsf}
\end{figure}
\begin{figure}
\centering\input{airfoil1_clu.tex}
\caption{Graphs for \textit{Airfoil1}}\label{fig:air}
\end{figure}
\begin{figure}
\centering\input{barth_clu.tex}
\caption{Graphs for \textit{Barth}}\label{fig:bar}
\end{figure}
\begin{figure}
\centering\input{grid2_clu.tex}
\caption{Graphs for \textit{Grid2}\label{fig:gri}}
\end{figure}
\begin{figure}
\centering\input{3elt_clu.tex}
\caption{Graphs for \textit{3elt}}\label{fig:elt}
\end{figure}
\begin{figure}
\centering
\begin{tabular}{ccccccccc}
Graph & \multicolumn{4}{c}{Spectral} & \multicolumn{4}{c}{K-means} \\
{} & 1 & 2 & 3 & 4 & 1 & 2 & 3 & 4 \\\hline
airfoil1 & 1150& 1082 &1050& 971 & 1871& 347 & 738 &1297\\
barth & 1601 &1490 &1405 &2195 & 70 &3526 & 70 &3025\\
grid2 & 379& 827 &1305 & 785 & 1271 & 604 & 238 &1183\\
3elt & 965 &874 &1794 &1087 & 13 &1714 & 37 &2956\\
\end{tabular}
\caption{Spectral and K-means clustering node counts for node counts}\label{fig:tab}
\end{figure}
\end{document}

8
mp4/submit.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/sh
PID="4"
dname="Project_${PID}_Maggioni_Claudio"
zname="project_${PID}_Maggioni_Claudio"
rm -v $zname.zip
zip $zname.zip *.tex *.sty usi_inf.pdf $zname.{pdf,tex} $dname/datasets/*.m $dname/src/*.m