diff --git a/hw1/ex3.m b/hw1/ex3.m index 1896868..c6f71f8 100644 --- a/hw1/ex3.m +++ b/hw1/ex3.m @@ -9,8 +9,8 @@ clc close all format short -colw = 3; -colh = 4; +colw = 5; +colh = 2; %% Exercise 3.1 @@ -42,7 +42,7 @@ for u = 1:10 subplot(colh, colw, u); h = surf(xaxis, yaxis, Zs{u}); set(h,'LineStyle','none'); - title(sprintf("\nu=%d", u)); + title(sprintf("u=%d", u)); end sgtitle("Surf plots"); @@ -52,14 +52,17 @@ matlab2tikz('showInfo', false, './surf.tex') figure -yi = zeros(30, 25); -ni = zeros(30, 25); +% max iterations +c = 100; + +yi = zeros(30, c); +ni = zeros(30, c); its = zeros(30, 1); for u = 1:10 subplot(colh, colw, u); contour(xaxis, yaxis, Zs{u}, 10); - title(sprintf("\nu=%d", u)); + title(sprintf("u=%d", u)); %% Exercise 3.3 @@ -80,12 +83,12 @@ for u = 1:10 x = x0; i = 1; - xi = zeros(2, 25); + xi = zeros(2, c); xi(:, 1) = x0; yi(ri, 1) = subs(f, [sx sy], x0'); - while true + while i <= c p = -1 * double(subs(g, [sx sy], x')); ni(ri, i) = log10(norm(p, 2)); @@ -125,7 +128,7 @@ figure for u = 1:10 subplot(colh, colw, u); - title(sprintf("\nu=%d", u)); + title(sprintf("u=%d", u)); hold on for j = 1:3 ri = u * 3 - 3 + j; @@ -152,10 +155,10 @@ for u = 1:10 vec = ni(ri, :); vec = vec(1:its(ri)); - plot(1:its(ri), vec); + plot(1:its(ri), vec, '-o'); end hold off - title(sprintf("\nu=%d", u)); + title(sprintf("u=%d", u)); end sgtitle("Iterations over log10 of gradient norms"); diff --git a/hw1/main.pdf b/hw1/main.pdf index 15ef9d1..2bee882 100644 Binary files a/hw1/main.pdf and b/hw1/main.pdf differ diff --git a/hw1/main.tex b/hw1/main.tex index a059a4d..c4e78e7 100644 --- a/hw1/main.tex +++ b/hw1/main.tex @@ -101,19 +101,59 @@ where: \[v = \begin{bmatrix}x\\y\end{bmatrix}\] -\subsection{Matlab implementation with \texttt{surf} and \texttt{contour}} +\subsection{Finding the optimal step length $\alpha$} + +Considering $p$, our search direction, as the negative of the gradient (as dictated by the gradient method), we can rewrite the problem of finding an optimal step size $\alpha$ as the problem of minimizing the objective function along the line where $p$ belongs. This can be written as minimizing a function $l(\alpha)$, where: + +\[l(\alpha) = \frac12 \langle A(x + \alpha p), x + \alpha p\rangle\] + +To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$. + +\[l'(\alpha) = 2 \cdot \frac12 \langle A (x + \alpha p), p \rangle = \langle Ax, p \rangle + \alpha \langle Ap, p \rangle\] +\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\] + +Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method. + +\subsection{Matlab implementation with \texttt{surf} and \texttt{contour} plots} The graphs generated by MATLAB are shown below: +\begin{figure}[h] \resizebox{\textwidth}{!}{\input{surf.tex}} +\caption{Surf plots for different values of $\mu$} +\end{figure} +\begin{figure}[h] \resizebox{\textwidth}{!}{\input{contour.tex}} +\caption{Contour plots and iteration steps. Red has $x_0 = \begin{bmatrix}10&0\end{bmatrix}^T$, + yellow has $x_0 = \begin{bmatrix}10&10\end{bmatrix}^T$, and blue has $x_0 = \begin{bmatrix}0&10\end{bmatrix}^T$} +\end{figure} +\begin{figure}[h] \resizebox{\textwidth}{!}{\input{yseries.tex}} +\caption{Iterations over values of the objective function. Red has $x_0 = \begin{bmatrix}10&0\end{bmatrix}^T$, + yellow has $x_0 = \begin{bmatrix}10&10\end{bmatrix}^T$, and blue has $x_0 = \begin{bmatrix}0&10\end{bmatrix}^T.$} +\end{figure} +\begin{figure}[h] \resizebox{\textwidth}{!}{\input{norms.tex}} +\caption{Iterations over base 10 logarithm of gradient norms. Note that for $\mu=1$ the search immediately converges to the + exact minimizer no matter the value of $x_0$, so no gradient norm other than the very first one is recorded. Again, + Red has $x_0 = \begin{bmatrix}10&0\end{bmatrix}^T$, + yellow has $x_0 = \begin{bmatrix}10&10\end{bmatrix}^T$, and blue has $x_0 = \begin{bmatrix}0&10\end{bmatrix}^T.$} +\end{figure} + +Isolines get stretched along the y axis as $\mu$ increases. For $\mu \neq 1$, points well far away from the axes are a +problem since picking search directions and steps using the gradient method iterations will zig-zag +to the minimizer reaching it slowly. + +Additionally, from the \texttt{surf} plots, we can see that the behaviour of isolines is justified by a "stretching" of sorts +of the function that causes the y axis to be steeper as $\mu$ increases. + +What has been said before about the convergence of the gradient method is additionally showed in the last two sets of plots. +From the objective function plot we can see that iterations starting from $\begin{bmatrix}10&10\end{bmatrix}^T$ (depicted in yellow) take the highest number of iterations to reach the minimizer (or an acceptable approximation of it). The zig-zag behaviour described before can be also observed in the contour plots, showing the iteration steps taken for each $\mu$ and starting from each $x_0$. + +Finally, in the gradient norm plots a phenomena that creates increasingly flatter plateaus as $\mu$ increases can be observed. -Isolines get stretched along the y axis as $\mu$ increases. For a large $\mu$, points well far away from the axes could be a -problem since picking search directions and steps using a naive gradient based method iterations will zig-zag to the minimizer reaching it slowly. \end{document} diff --git a/hw1/norms.tex b/hw1/norms.tex index e4479ac..24f2845 100644 --- a/hw1/norms.tex +++ b/hw1/norms.tex @@ -22,17 +22,17 @@ title={u=1}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 1.45154499349597\\ 2 -inf\\ @@ -55,17 +55,17 @@ title={u=2}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 1.60205999132796\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 1.65051499783199\\ 2 0.997302484056647\\ @@ -104,17 +104,17 @@ title={u=3}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 1.77815125038364\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 1.80102999566398\\ 2 1.13202321470541\\ @@ -157,17 +157,17 @@ title={u=4}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 1.90308998699194\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 1.91625445635312\\ 2 1.18252234575789\\ @@ -210,17 +210,17 @@ title={u=5}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.00851666964939\\ 2 1.20917612019581\\ @@ -263,17 +263,17 @@ title={u=6}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2.07918124604762\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.08513085769748\\ 2 1.22579237856861\\ @@ -316,17 +316,17 @@ title={u=7}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2.14612803567824\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.15051499783199\\ 2 1.23720584565836\\ @@ -367,17 +367,17 @@ title={u=8}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2.20411998265593\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.20748667398541\\ 2 1.24555733587979\\ @@ -418,17 +418,17 @@ title={u=9}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2.25527250510331\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.25793692185584\\ 2 1.25194655816665\\ @@ -469,17 +469,17 @@ title={u=10}, axis x line*=bottom, axis y line*=left ] -\addplot [color=mycolor1, forget plot] +\addplot [color=mycolor1, mark=o, mark options={solid, mycolor1}, forget plot] table[row sep=crcr]{% 1 2.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor2, forget plot] +\addplot [color=mycolor2, mark=o, mark options={solid, mycolor2}, forget plot] table[row sep=crcr]{% 1 1.30102999566398\\ 2 -inf\\ }; -\addplot [color=mycolor3, forget plot] +\addplot [color=mycolor3, mark=o, mark options={solid, mycolor3}, forget plot] table[row sep=crcr]{% 1 2.3031906825553\\ 2 1.25699911451531\\