diff --git a/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf b/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf index 2d2655c..be6b249 100644 Binary files a/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf and b/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf differ diff --git a/Claudio_Maggioni_1/Claudio_Maggioni_1.tex b/Claudio_Maggioni_1/Claudio_Maggioni_1.tex index fa0cc82..09ad756 100755 --- a/Claudio_Maggioni_1/Claudio_Maggioni_1.tex +++ b/Claudio_Maggioni_1/Claudio_Maggioni_1.tex @@ -73,7 +73,7 @@ It is a necessary condition for a minimizer $x^*$ of $J$ that: \subsection{Second order necessary condition} -It is a necessary condition for a minimizer $x^*$ of $J$ that: +It is a necessary condition for a minimizer $x^*$ of $J$ that the first order necessary condition holds and: \[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\] @@ -87,7 +87,7 @@ It is a sufficient condition for $x^*$ to be a minimizer of $J$ that the first n Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers. -However, for if $A$ would be guaranteed to have full rank, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank). +However, if $A$ is guaranteed to be s.p.d, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank and that solution being the minimizer since the hessian would be always convex). \section{Exercise 3} @@ -150,7 +150,7 @@ Considering $p$, our search direction, as the negative of the gradient (as dicta To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$. \[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\] -\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\] +\[l'(\alpha) = 0 \Leftrightarrow \alpha = -\frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\] Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method. diff --git a/Claudio_Maggioni_1/ex3.asv b/Claudio_Maggioni_1/ex3.asv new file mode 100644 index 0000000..581ac3f --- /dev/null +++ b/Claudio_Maggioni_1/ex3.asv @@ -0,0 +1,168 @@ +%% Homework 1 - Optimization Methods +% Author: Claudio Maggioni +% +% Sources: +% - https://www.youtube.com/watch?v=91RZYO1cv_o + +clear +clc +close all +format short + +colw = 5; +colh = 2; + +%% Exercise 3.1 + +% f(x1, x2) = x1^2 + u * x2^2; + +% 1/2 * [x1 x2] [2 0] [x1] + [0][x1] +% [0 2u] [x2] + [0][x2] + +% A = [1 0; 0 u]; b = [0; 0] + +%% Exercise 3.2 +xaxis = -10:0.1:10; +yaxis = xaxis; +Zn = zeros(size(xaxis, 2), size(yaxis, 2)); +Zs = {Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn}; + +for u = 1:10 + A = [1 0; 0 u]; + + for i = 1:size(xaxis, 2) + for j = 1:size(yaxis, 2) + vec = [xaxis(i); yaxis(j)]; + Zs{u}(i, j) = vec' * A * vec; + end + end +end + +for u = 1:10 + subplot(colh, colw, u); + h = surf(xaxis, yaxis, Zs{u}); + set(h,'LineStyle','none'); + title(sprintf("u=%d", u)); +end +sgtitle("Surf plots"); + +% comment these lines on submission +% addpath /home/claudio/git/matlab2tikz/src +% matlab2tikz('showInfo', false, './surf.tex') + +figure + +% max iterations +c = 100; + +yi = zeros(30, c); +ni = zeros(30, c); +its = zeros(30, 1); + +for u = 1:10 + subplot(colh, colw, u); + contour(xaxis, yaxis, Zs{u}, 10); + title(sprintf("u=%d", u)); + + +%% Exercise 3.3 + + A = [2 0; 0 2*u]; + b = [0; 0]; + xs = [[0; 10] [10; 0] [10; 10]]; + + syms sx sy + f = 1/2 * [sx sy] * A * [sx; sy]; + + g = gradient(f, [sx; sy]); + + hold on + j = 1; + for x0 = xs + ri = u * 3 - 3 + j; + x = x0; + i = 1; + + xi = zeros(2, c); + xi(:, 1) = x0; + + yi(ri, 1) = subs(f, [sx sy], x0'); + + while i <= c + p = -1 * double(subs(g, [sx sy], x')); + + ni(ri, i) = log10(norm(p, 2)); + if norm(p, 2) == 0 || ni(ri, i) <= -8 + break + end + + alpha = dot(b - A * x, p) / dot(A * p, p); + + x = x + alpha * p; + i = i + 1; + + xi(:, i) = x; + yi(ri, i) = subs(f, [sx sy], x'); + end + + xi = xi(:, 1:i); + + plot(xi(1, :), xi(2, :), '-'); + + fprintf("u=%2d x0=[%2d,%2d] it=%2d x=[%d,%d]\n", u, ... + x0(1), x0(2), i, x(1), x(2)); + + its(ri) = i; + + j = j + 1; + end + hold off +end +sgtitle("Contour plots and iteration steps"); + +% comment these lines on submission +% addpath /home/claudio/git/matlab2tikz/src +% matlab2tikz('showInfo', false, './contour.tex') + +figure + +for u = 1:10 + subplot(colh, colw, u); + title(sprintf("u=%d", u)); + hold on + for j = 1:3 + ri = u * 3 - 3 + j; + vec = yi(ri, :); + vec = vec(1:its(ri)); + + plot(1:its(ri), vec); + end + hold off +end +sgtitle("Iterations over values of objective function"); + +% comment these lines on submission +% addpath /home/claudio/git/matlab2tikz/src +% matlab2tikz('showInfo', false, './yseries.tex') + +figure + +for u = 1:10 + subplot(colh, colw, u); + hold on + for j = 1:3 + ri = u * 3 - 3 + j; + vec = ni(ri, :); + vec = vec(1:its(ri)); + + plot(1:its(ri), vec, '-o'); + end + hold off + title(sprintf("u=%d", u)); +end +sgtitle("Iterations over log10 of gradient norms"); + +% comment these lines on submission +% addpath /home/claudio/git/matlab2tikz/src +% matlab2tikz('showInfo', false, './norms.tex') + diff --git a/Claudio_Maggioni_1/ex3.m b/Claudio_Maggioni_1/ex3.m index 9f9312a..75e4ccc 100644 --- a/Claudio_Maggioni_1/ex3.m +++ b/Claudio_Maggioni_1/ex3.m @@ -67,12 +67,11 @@ for u = 1:10 %% Exercise 3.3 - A = [2 0; 0 2*u]; - b = [0; 0]; + A = [1 0; 0 1*u]; xs = [[0; 10] [10; 0] [10; 10]]; syms sx sy - f = 1/2 * [sx sy] * A * [sx; sy]; + f = [sx sy] * A * [sx; sy]; g = gradient(f, [sx; sy]); @@ -96,7 +95,7 @@ for u = 1:10 break end - alpha = dot(b - A * x, p) / dot(A * p, p); + alpha = dot(-A * x, p) / dot(A * p, p); x = x + alpha * p; i = i + 1;