hw1: corrections

This commit is contained in:
Claudio Maggioni (maggicl) 2021-03-25 23:31:47 +01:00
parent 37d800cf67
commit 39a61ac5d5
4 changed files with 174 additions and 7 deletions

View file

@ -73,7 +73,7 @@ It is a necessary condition for a minimizer $x^*$ of $J$ that:
\subsection{Second order necessary condition}
It is a necessary condition for a minimizer $x^*$ of $J$ that:
It is a necessary condition for a minimizer $x^*$ of $J$ that the first order necessary condition holds and:
\[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\]
@ -87,7 +87,7 @@ It is a sufficient condition for $x^*$ to be a minimizer of $J$ that the first n
Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers.
However, for if $A$ would be guaranteed to have full rank, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank).
However, if $A$ is guaranteed to be s.p.d, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank and that solution being the minimizer since the hessian would be always convex).
\section{Exercise 3}
@ -150,7 +150,7 @@ Considering $p$, our search direction, as the negative of the gradient (as dicta
To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$.
\[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\]
\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
\[l'(\alpha) = 0 \Leftrightarrow \alpha = -\frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method.

168
Claudio_Maggioni_1/ex3.asv Normal file
View file

@ -0,0 +1,168 @@
%% Homework 1 - Optimization Methods
% Author: Claudio Maggioni
%
% Sources:
% - https://www.youtube.com/watch?v=91RZYO1cv_o
clear
clc
close all
format short
colw = 5;
colh = 2;
%% Exercise 3.1
% f(x1, x2) = x1^2 + u * x2^2;
% 1/2 * [x1 x2] [2 0] [x1] + [0][x1]
% [0 2u] [x2] + [0][x2]
% A = [1 0; 0 u]; b = [0; 0]
%% Exercise 3.2
xaxis = -10:0.1:10;
yaxis = xaxis;
Zn = zeros(size(xaxis, 2), size(yaxis, 2));
Zs = {Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn};
for u = 1:10
A = [1 0; 0 u];
for i = 1:size(xaxis, 2)
for j = 1:size(yaxis, 2)
vec = [xaxis(i); yaxis(j)];
Zs{u}(i, j) = vec' * A * vec;
end
end
end
for u = 1:10
subplot(colh, colw, u);
h = surf(xaxis, yaxis, Zs{u});
set(h,'LineStyle','none');
title(sprintf("u=%d", u));
end
sgtitle("Surf plots");
% comment these lines on submission
% addpath /home/claudio/git/matlab2tikz/src
% matlab2tikz('showInfo', false, './surf.tex')
figure
% max iterations
c = 100;
yi = zeros(30, c);
ni = zeros(30, c);
its = zeros(30, 1);
for u = 1:10
subplot(colh, colw, u);
contour(xaxis, yaxis, Zs{u}, 10);
title(sprintf("u=%d", u));
%% Exercise 3.3
A = [2 0; 0 2*u];
b = [0; 0];
xs = [[0; 10] [10; 0] [10; 10]];
syms sx sy
f = 1/2 * [sx sy] * A * [sx; sy];
g = gradient(f, [sx; sy]);
hold on
j = 1;
for x0 = xs
ri = u * 3 - 3 + j;
x = x0;
i = 1;
xi = zeros(2, c);
xi(:, 1) = x0;
yi(ri, 1) = subs(f, [sx sy], x0');
while i <= c
p = -1 * double(subs(g, [sx sy], x'));
ni(ri, i) = log10(norm(p, 2));
if norm(p, 2) == 0 || ni(ri, i) <= -8
break
end
alpha = dot(b - A * x, p) / dot(A * p, p);
x = x + alpha * p;
i = i + 1;
xi(:, i) = x;
yi(ri, i) = subs(f, [sx sy], x');
end
xi = xi(:, 1:i);
plot(xi(1, :), xi(2, :), '-');
fprintf("u=%2d x0=[%2d,%2d] it=%2d x=[%d,%d]\n", u, ...
x0(1), x0(2), i, x(1), x(2));
its(ri) = i;
j = j + 1;
end
hold off
end
sgtitle("Contour plots and iteration steps");
% comment these lines on submission
% addpath /home/claudio/git/matlab2tikz/src
% matlab2tikz('showInfo', false, './contour.tex')
figure
for u = 1:10
subplot(colh, colw, u);
title(sprintf("u=%d", u));
hold on
for j = 1:3
ri = u * 3 - 3 + j;
vec = yi(ri, :);
vec = vec(1:its(ri));
plot(1:its(ri), vec);
end
hold off
end
sgtitle("Iterations over values of objective function");
% comment these lines on submission
% addpath /home/claudio/git/matlab2tikz/src
% matlab2tikz('showInfo', false, './yseries.tex')
figure
for u = 1:10
subplot(colh, colw, u);
hold on
for j = 1:3
ri = u * 3 - 3 + j;
vec = ni(ri, :);
vec = vec(1:its(ri));
plot(1:its(ri), vec, '-o');
end
hold off
title(sprintf("u=%d", u));
end
sgtitle("Iterations over log10 of gradient norms");
% comment these lines on submission
% addpath /home/claudio/git/matlab2tikz/src
% matlab2tikz('showInfo', false, './norms.tex')

View file

@ -67,12 +67,11 @@ for u = 1:10
%% Exercise 3.3
A = [2 0; 0 2*u];
b = [0; 0];
A = [1 0; 0 1*u];
xs = [[0; 10] [10; 0] [10; 10]];
syms sx sy
f = 1/2 * [sx sy] * A * [sx; sy];
f = [sx sy] * A * [sx; sy];
g = gradient(f, [sx; sy]);
@ -96,7 +95,7 @@ for u = 1:10
break
end
alpha = dot(b - A * x, p) / dot(A * p, p);
alpha = dot(-A * x, p) / dot(A * p, p);
x = x + alpha * p;
i = i + 1;