hw1: corrections

2021-03-25 23:31:47 +01:00 · 2021-03-25 23:31:47 +01:00 · 39a61ac5d5
commit 39a61ac5d5
parent 37d800cf67
4 changed files with 174 additions and 7 deletions
--- a/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf
+++ b/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf
--- a/Claudio_Maggioni_1/Claudio_Maggioni_1.tex
+++ b/Claudio_Maggioni_1/Claudio_Maggioni_1.tex
@ -73,7 +73,7 @@ It is a necessary condition for a minimizer $x^*$ of $J$ that:
 \subsection{Second order necessary condition}
-It is a necessary condition for a minimizer $x^*$ of $J$ that:
+It is a necessary condition for a minimizer $x^*$ of $J$ that the first order necessary condition holds and:
 \[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\]
@ -87,7 +87,7 @@ It is a sufficient condition for $x^*$ to be a minimizer of $J$ that the first n
 Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers.
-However, for if $A$ would be guaranteed to have full rank, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank).
+However, if $A$ is guaranteed to be s.p.d, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank and that solution being the minimizer since the hessian would be always convex).
 \section{Exercise 3}
@ -150,7 +150,7 @@ Considering $p$, our search direction, as the negative of the gradient (as dicta
 To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$.
 \[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\]
-\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
+\[l'(\alpha) = 0 \Leftrightarrow \alpha = -\frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
 Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method.
--- a/Claudio_Maggioni_1/ex3.asv
+++ b/Claudio_Maggioni_1/ex3.asv
@ -0,0 +1,168 @@
 %% Homework 1 - Optimization Methods
 % Author: Claudio Maggioni
 %
 % Sources:
 % - https://www.youtube.com/watch?v=91RZYO1cv_o
 clear
 clc
 close all
 format short
 colw = 5;
 colh = 2;
 %% Exercise 3.1
 % f(x1, x2) = x1^2 + u * x2^2;
 % 1/2 * [x1 x2] [2  0] [x1] + [0][x1]
 %               [0 2u] [x2] + [0][x2]
 % A = [1 0; 0 u]; b = [0; 0]
 %% Exercise 3.2
 xaxis = -10:0.1:10;
 yaxis = xaxis;
 Zn = zeros(size(xaxis, 2), size(yaxis, 2));
 Zs = {Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn};
 for u = 1:10
    A = [1 0; 0 u];
    for i = 1:size(xaxis, 2)
        for j = 1:size(yaxis, 2)
            vec = [xaxis(i); yaxis(j)];
            Zs{u}(i, j) = vec' * A * vec;
        end
    end
 end
 for u = 1:10
    subplot(colh, colw, u);
    h = surf(xaxis, yaxis, Zs{u});
    set(h,'LineStyle','none');
    title(sprintf("u=%d", u));
 end
 sgtitle("Surf plots");
 % comment these lines on submission
 % addpath /home/claudio/git/matlab2tikz/src
 % matlab2tikz('showInfo', false, './surf.tex')
 figure
 % max iterations
 c = 100;
 yi = zeros(30, c);
 ni = zeros(30, c);
 its = zeros(30, 1);
 for u = 1:10
    subplot(colh, colw, u);
    contour(xaxis, yaxis, Zs{u}, 10);
    title(sprintf("u=%d", u));
 %% Exercise 3.3
    A = [2 0; 0 2*u];
    b = [0; 0];
    xs = [[0; 10] [10; 0] [10; 10]];
    syms sx sy
    f = 1/2 * [sx sy] * A * [sx; sy];
    g = gradient(f, [sx; sy]);
    hold on
    j = 1;
    for x0 = xs
        ri = u * 3 - 3 + j;
        x = x0;
        i = 1;
        xi = zeros(2, c);
        xi(:, 1) = x0;
        yi(ri, 1) = subs(f, [sx sy], x0');
        while i <= c
            p = -1 * double(subs(g, [sx sy], x'));
            ni(ri, i) = log10(norm(p, 2));
            if norm(p, 2) == 0 || ni(ri, i) <= -8
                break
            end
            alpha = dot(b - A * x, p) / dot(A * p, p);
            x = x + alpha * p;
            i = i + 1;
            xi(:, i) = x;
            yi(ri, i) = subs(f, [sx sy], x');
        end
        xi = xi(:, 1:i);
        plot(xi(1, :), xi(2, :), '-');
        fprintf("u=%2d x0=[%2d,%2d] it=%2d x=[%d,%d]\n", u, ...
            x0(1), x0(2), i, x(1), x(2));
        its(ri) = i;
        j = j + 1;
    end
    hold off
 end
 sgtitle("Contour plots and iteration steps");
 % comment these lines on submission
 % addpath /home/claudio/git/matlab2tikz/src
 % matlab2tikz('showInfo', false, './contour.tex')
 figure
 for u = 1:10
    subplot(colh, colw, u);
    title(sprintf("u=%d", u));
    hold on
    for j = 1:3
        ri = u * 3 - 3 + j;
        vec = yi(ri, :);
        vec = vec(1:its(ri));
        plot(1:its(ri), vec);
    end
    hold off
 end
 sgtitle("Iterations over values of objective function");
 % comment these lines on submission
 % addpath /home/claudio/git/matlab2tikz/src
 % matlab2tikz('showInfo', false, './yseries.tex')
 figure
 for u = 1:10
    subplot(colh, colw, u);
    hold on
    for j = 1:3
        ri = u * 3 - 3 + j;
        vec = ni(ri, :);
        vec = vec(1:its(ri));
        plot(1:its(ri), vec, '-o');
    end
    hold off
    title(sprintf("u=%d", u));
 end
 sgtitle("Iterations over log10 of gradient norms");
 % comment these lines on submission
 % addpath /home/claudio/git/matlab2tikz/src
 % matlab2tikz('showInfo', false, './norms.tex')
--- a/Claudio_Maggioni_1/ex3.m
+++ b/Claudio_Maggioni_1/ex3.m
@ -67,12 +67,11 @@ for u = 1:10
 %% Exercise 3.3
-    A = [2 0; 0 2*u];
+    A = [1 0; 0 1*u];
    b = [0; 0];
    xs = [[0; 10] [10; 0] [10; 10]];
    syms sx sy
-    f = 1/2 * [sx sy] * A * [sx; sy];
+    f = [sx sy] * A * [sx; sy];
    g = gradient(f, [sx; sy]);
@ -96,7 +95,7 @@ for u = 1:10
                break
            end
-            alpha = dot(b - A * x, p) / dot(A * p, p);
+            alpha = dot(-A * x, p) / dot(A * p, p);
            x = x + alpha * p;
            i = i + 1;