hw1: corrections

2021-03-25 23:31:47 +01:00 · 2021-03-25 23:31:47 +01:00 · 39a61ac5d5
commit 39a61ac5d5
parent 37d800cf67
4 changed files with 174 additions and 7 deletions
--- a/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf
+++ b/Claudio_Maggioni_1/Claudio_Maggioni_1.pdf
--- a/Claudio_Maggioni_1/Claudio_Maggioni_1.tex
+++ b/Claudio_Maggioni_1/Claudio_Maggioni_1.tex
@ -73,7 +73,7 @@ It is a necessary condition for a minimizer $x^*$ of $J$ that:

 \subsection{Second order necessary condition}

-It is a necessary condition for a minimizer $x^*$ of $J$ that:
+It is a necessary condition for a minimizer $x^*$ of $J$ that the first order necessary condition holds and:

 \[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\]

@ -87,7 +87,7 @@ It is a sufficient condition for $x^*$ to be a minimizer of $J$ that the first n

 Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers.

-However, for if $A$ would be guaranteed to have full rank, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank).
+However, if $A$ is guaranteed to be s.p.d, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank and that solution being the minimizer since the hessian would be always convex).

 \section{Exercise 3}

@ -150,7 +150,7 @@ Considering $p$, our search direction, as the negative of the gradient (as dicta
 To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$.

 \[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\]
-\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
+\[l'(\alpha) = 0 \Leftrightarrow \alpha = -\frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]

 Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method.

--- a/Claudio_Maggioni_1/ex3.asv
+++ b/Claudio_Maggioni_1/ex3.asv
@ -0,0 +1,168 @@
+%% Homework 1 - Optimization Methods
+% Author: Claudio Maggioni
+%
+% Sources:
+% - https://www.youtube.com/watch?v=91RZYO1cv_o
+
+clear
+clc
+close all
+format short
+
+colw = 5;
+colh = 2;
+
+%% Exercise 3.1
+
+% f(x1, x2) = x1^2 + u * x2^2;
+
+% 1/2 * [x1 x2] [2  0] [x1] + [0][x1]
+%               [0 2u] [x2] + [0][x2]
+
+% A = [1 0; 0 u]; b = [0; 0]
+
+%% Exercise 3.2
+xaxis = -10:0.1:10;
+yaxis = xaxis;
+Zn = zeros(size(xaxis, 2), size(yaxis, 2));
+Zs = {Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn};
+
+for u = 1:10
+    A = [1 0; 0 u];
+
+    for i = 1:size(xaxis, 2)
+        for j = 1:size(yaxis, 2)
+            vec = [xaxis(i); yaxis(j)];
+            Zs{u}(i, j) = vec' * A * vec;
+        end
+    end
+end
+
+for u = 1:10
+    subplot(colh, colw, u);
+    h = surf(xaxis, yaxis, Zs{u});
+    set(h,'LineStyle','none');
+    title(sprintf("u=%d", u));
+end
+sgtitle("Surf plots");
+
+% comment these lines on submission
+% addpath /home/claudio/git/matlab2tikz/src
+% matlab2tikz('showInfo', false, './surf.tex')
+
+figure
+
+% max iterations
+c = 100;
+
+yi = zeros(30, c);
+ni = zeros(30, c);
+its = zeros(30, 1);
+
+for u = 1:10
+    subplot(colh, colw, u);
+    contour(xaxis, yaxis, Zs{u}, 10);
+    title(sprintf("u=%d", u));
+
+
+%% Exercise 3.3
+
+    A = [2 0; 0 2*u];
+    b = [0; 0];
+    xs = [[0; 10] [10; 0] [10; 10]];
+
+    syms sx sy
+    f = 1/2 * [sx sy] * A * [sx; sy];
+        
+    g = gradient(f, [sx; sy]);
+    
+    hold on
+    j = 1;
+    for x0 = xs
+        ri = u * 3 - 3 + j;
+        x = x0;
+        i = 1;
+        
+        xi = zeros(2, c);
+        xi(:, 1) = x0;
+        
+        yi(ri, 1) = subs(f, [sx sy], x0');
+        
+        while i <= c
+            p = -1 * double(subs(g, [sx sy], x'));
+            
+            ni(ri, i) = log10(norm(p, 2));
+            if norm(p, 2) == 0 || ni(ri, i) <= -8
+                break
+            end
+            
+            alpha = dot(b - A * x, p) / dot(A * p, p);
+
+            x = x + alpha * p;
+            i = i + 1;
+            
+            xi(:, i) = x;
+            yi(ri, i) = subs(f, [sx sy], x');
+        end
+        
+        xi = xi(:, 1:i);
+        
+        plot(xi(1, :), xi(2, :), '-');
+        
+        fprintf("u=%2d x0=[%2d,%2d] it=%2d x=[%d,%d]\n", u, ...
+            x0(1), x0(2), i, x(1), x(2));
+        
+        its(ri) = i;
+        
+        j = j + 1;
+    end
+    hold off
+end
+sgtitle("Contour plots and iteration steps");
+
+% comment these lines on submission
+% addpath /home/claudio/git/matlab2tikz/src
+% matlab2tikz('showInfo', false, './contour.tex')
+
+figure
+
+for u = 1:10
+    subplot(colh, colw, u);
+    title(sprintf("u=%d", u));
+    hold on
+    for j = 1:3
+        ri = u * 3 - 3 + j;
+        vec = yi(ri, :);
+        vec = vec(1:its(ri));
+        
+        plot(1:its(ri), vec);
+    end
+    hold off
+end
+sgtitle("Iterations over values of objective function");
+
+% comment these lines on submission
+% addpath /home/claudio/git/matlab2tikz/src
+% matlab2tikz('showInfo', false, './yseries.tex')
+
+figure
+
+for u = 1:10
+    subplot(colh, colw, u);
+    hold on
+    for j = 1:3
+        ri = u * 3 - 3 + j;
+        vec = ni(ri, :);
+        vec = vec(1:its(ri));
+        
+        plot(1:its(ri), vec, '-o');
+    end
+    hold off
+    title(sprintf("u=%d", u));
+end
+sgtitle("Iterations over log10 of gradient norms");
+
+% comment these lines on submission
+% addpath /home/claudio/git/matlab2tikz/src
+% matlab2tikz('showInfo', false, './norms.tex')
+
--- a/Claudio_Maggioni_1/ex3.m
+++ b/Claudio_Maggioni_1/ex3.m
@ -67,12 +67,11 @@ for u = 1:10

 %% Exercise 3.3

-    A = [2 0; 0 2*u];
-    b = [0; 0];
+    A = [1 0; 0 1*u];
    xs = [[0; 10] [10; 0] [10; 10]];

    syms sx sy
-    f = 1/2 * [sx sy] * A * [sx; sy];
+    f = [sx sy] * A * [sx; sy];
        
    g = gradient(f, [sx; sy]);
    
@ -96,7 +95,7 @@ for u = 1:10
                break
            end
            
-            alpha = dot(b - A * x, p) / dot(A * p, p);
+            alpha = dot(-A * x, p) / dot(A * p, p);

            x = x + alpha * p;
            i = i + 1;