\documentclass[unicode,11pt,a4paper,oneside,numbers=endperiod,openany]{scrartcl} \usepackage{graphicx} \input{assignment.sty} \usepackage{pgfplots} \pgfplotsset{compat=newest} \usetikzlibrary{plotmarks} \usetikzlibrary{arrows.meta} \usepgfplotslibrary{patchplots} \usepackage{grffile} \usepackage{amsmath} \usepgfplotslibrary{external} \tikzexternalize \begin{document} \setassignment \setduedate{Wednesday, 18 November 2020, 11:55 PM} \serieheader{Numerical Computing}{2020}{Student: Claudio Maggioni}{Discussed with: --}{Solution for Project 4}{} \newline \assignmentpolicy \section{Spectral clustering of non-convex sets [60 points]:} Plots for the \textit{Two spirals}, \textit{Cluster in cluster}, \textit{Crescent moon}, \textit{Full moon} clustering for $K=2$ and for \textit{Corners}, \textit{Outlier} for $K=4$ can be found in figures~\ref{fig:setsa},~\ref{fig:setsb},~\ref{fig:setsc},~\ref{fig:setsd},~\ref{fig:setse}, and ~\ref{fig:setsf}. All the plots are reproducible by simply running \texttt{ClusterPoints.m} once. \subsection{Observation on the \textit{Spiral} set of points} It is possible to distinguish two distinct cluster in the \textit{Spiral} set: if we consider this set of points as two intertwined non-intersecting spiral shaped curves, then each of the spiral can be considered as a cluster. However, it is possible that a naive clustering approach might not recognise these two clusters: since the spirals are intertwined and the points they are rotating on are close, averaging the points on each spiral leads to very close centroids and thus an algorithm heavily based on coordinate averaging (like k-means clustering) might have a hard time in identifying the spirals. \subsection{Choice of $\sigma$ parameter for the Gaussian similarity function} According to the recommendations and rules of thumb on $\epsilon$ neighboorhood graph based spectral clustering, the $\sigma$ parameter, as a rule of thumb, should be chosen in the order of $\log(n)$. However, choosing $\sigma = \log(n)$ produces ill-conditioned i.e. (singular) Laplacian matrices for some graphs, which make spectral clustering inaccurate or impossible. Therefore, I have chosen $\sigma = 2 \log(n)$. As the choice of this parameter is not governed by any strict law and this choice follows the rule of thumb and produces results that do not raise suspicion on incorrectness, I sticked to this choice. \section{Spectral clustering of real-world graphs [40 points]:} \subsection{Plotting and commenting spectral and k-means clustering for several example graphs} Plots of spectral and K-means clustering for graphs \textit{Airfoil}, \textit{Barth}, \textit{Grid2}, and \textit{3elt} can be found respectively in figure~\ref{fig:air},~\ref{fig:bar},~\ref{fig:gri} and~\ref{fig:elt}. These graphs are reproducible by running \texttt{ClusterGraphs.m} once. Overall, in all graphs spectral clustering seems to favour a more even distribution of vertices along the various clusters, while K-means generates clusters that cover similar areas. The extreme example of this is the \textit{3elt} graph, where spectral and k-means clustering with wild imbalances respectively in cluster area and in cluster vertex count. In the \textit{Airfoil} graph it is unclear how the graph should be partitioned: both clusterings seem artificial and arbitrary. Again the comparison of cluster areas and cluster node counts follows what said above. For the \textit{Grid2} graph, spectral clustering seems to form a more natural cluster set by cutting somewhat radially along the center of the graph's ``hole''. In contrast, k-means clustering performs more artificial cuts along the y axis, creating clusters that resemble even slices of a cake. Both \textit{Barth} and \textit{3elt} clusterings differ drammatically between spectral and k-means clustering and, albeit following the general observation stated above as well, both are not natural or obvious clusterings to human judgement. \subsection{Vertex count for spectral and k-means clustering} The table for the node counts of each cluster produced by spectral and k-means clustering of graphs \textit{Airfoil}, \textit{Grid2}, \textit{Barth}, and \textit{3elt} can be found in figure~\ref{fig:tab}. The table and histograms found under the figures in the last section can be reproduced by running \texttt{ClusterGraphs.m} once. As stated before, we observe that node counts for clusters generated by spectral clustering are more balanced with each other than the ones produced by K-means. \begin{figure} \centering \scalebox{.75}{\input{Two spirals.tex}} \caption{Spectral and k-means clustering graphs for \textit{Two Spirals}}\label{fig:setsa} \end{figure} \begin{figure} \centering \scalebox{.75}{\input{Cluster in cluster.tex}} \caption{Spectral and k-means clustering graphs for \textit{Cluster in cluster}}\label{fig:setsb} \end{figure} \begin{figure} \centering \scalebox{.75}{\input{Half crescent.tex}} \caption{Spectral and k-means clustering graphs for \textit{Half crescent}}\label{fig:setsc} \end{figure} \begin{figure} \centering \scalebox{.75}{\input{Full crescent.tex}} \caption{Spectral and k-means clustering graphs for \textit{Full crescent}}\label{fig:setsd} \end{figure} \begin{figure} \centering \scalebox{.75}{\input{Corners.tex}} \caption{Spectral and k-means clustering graphs for \textit{Corners}}\label{fig:setse} \end{figure} \begin{figure} \centering \scalebox{.75}{\input{Outlier.tex}} \caption{Spectral and k-means clustering graphs for \textit{Outlier}}\label{fig:setsf} \end{figure} \begin{figure} \centering\input{airfoil1_clu.tex} \caption{Graphs for \textit{Airfoil1}}\label{fig:air} \end{figure} \begin{figure} \centering\input{barth_clu.tex} \caption{Graphs for \textit{Barth}}\label{fig:bar} \end{figure} \begin{figure} \centering\input{grid2_clu.tex} \caption{Graphs for \textit{Grid2}\label{fig:gri}} \end{figure} \begin{figure} \centering\input{3elt_clu.tex} \caption{Graphs for \textit{3elt}}\label{fig:elt} \end{figure} \begin{figure} \centering \begin{tabular}{ccccccccc} Graph & \multicolumn{4}{c}{Spectral} & \multicolumn{4}{c}{K-means} \\ {} & 1 & 2 & 3 & 4 & 1 & 2 & 3 & 4 \\\hline airfoil1 & 1150& 1082 &1050& 971 & 1871& 347 & 738 &1297\\ barth & 1601 &1490 &1405 &2195 & 70 &3526 & 70 &3025\\ grid2 & 379& 827 &1305 & 785 & 1271 & 604 & 238 &1183\\ 3elt & 965 &874 &1794 &1087 & 13 &1714 & 37 &2956\\ \end{tabular} \caption{Spectral and K-means clustering node counts for node counts}\label{fig:tab} \end{figure} \end{document}