hw1: corrections
This commit is contained in:
parent
37d800cf67
commit
39a61ac5d5
4 changed files with 174 additions and 7 deletions
Binary file not shown.
|
@ -73,7 +73,7 @@ It is a necessary condition for a minimizer $x^*$ of $J$ that:
|
||||||
|
|
||||||
\subsection{Second order necessary condition}
|
\subsection{Second order necessary condition}
|
||||||
|
|
||||||
It is a necessary condition for a minimizer $x^*$ of $J$ that:
|
It is a necessary condition for a minimizer $x^*$ of $J$ that the first order necessary condition holds and:
|
||||||
|
|
||||||
\[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\]
|
\[\nabla^2 J(x^*) \geq 0 \Leftrightarrow A \text{ is positive semi-definite}\]
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ It is a sufficient condition for $x^*$ to be a minimizer of $J$ that the first n
|
||||||
|
|
||||||
Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers.
|
Not in general. If for example we consider A and b to be only zeros, then $J(x) = 0$ for all $x \in \!R^n$ and thus $J$ would have an infinite number of minimizers.
|
||||||
|
|
||||||
However, for if $A$ would be guaranteed to have full rank, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank).
|
However, if $A$ is guaranteed to be s.p.d, the minimizer would be unique because the first order necessary condition would hold only for one value $x^*$. This is because the linear system $Ax^* = b$ would have one and only one solution (due to $A$ being full rank and that solution being the minimizer since the hessian would be always convex).
|
||||||
|
|
||||||
\section{Exercise 3}
|
\section{Exercise 3}
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ Considering $p$, our search direction, as the negative of the gradient (as dicta
|
||||||
To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$.
|
To minimize we compute the gradient of $l(\alpha)$ and fix it to zero to find a stationary point, finding a value for $\alpha$ in function of $A$, $x$ and $p$.
|
||||||
|
|
||||||
\[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\]
|
\[l'(\alpha) = 2 \cdot \langle A (x + \alpha p), p \rangle = 2 \cdot \left( \langle Ax, p \rangle + \alpha \langle Ap, p \rangle \right)\]
|
||||||
\[l'(\alpha) = 0 \Leftrightarrow \alpha = \frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
|
\[l'(\alpha) = 0 \Leftrightarrow \alpha = -\frac{\langle Ax, p \rangle}{\langle Ap, p \rangle}\]
|
||||||
|
|
||||||
Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method.
|
Since $A$ is s.p.d. by definition the hessian of function $l(\alpha)$ will always be positive, the stationary point found above is a minimizer of $l(\alpha)$ and thus the definition of $\alpha$ given above gives the optimal search step for the gradient method.
|
||||||
|
|
||||||
|
|
168
Claudio_Maggioni_1/ex3.asv
Normal file
168
Claudio_Maggioni_1/ex3.asv
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
%% Homework 1 - Optimization Methods
|
||||||
|
% Author: Claudio Maggioni
|
||||||
|
%
|
||||||
|
% Sources:
|
||||||
|
% - https://www.youtube.com/watch?v=91RZYO1cv_o
|
||||||
|
|
||||||
|
clear
|
||||||
|
clc
|
||||||
|
close all
|
||||||
|
format short
|
||||||
|
|
||||||
|
colw = 5;
|
||||||
|
colh = 2;
|
||||||
|
|
||||||
|
%% Exercise 3.1
|
||||||
|
|
||||||
|
% f(x1, x2) = x1^2 + u * x2^2;
|
||||||
|
|
||||||
|
% 1/2 * [x1 x2] [2 0] [x1] + [0][x1]
|
||||||
|
% [0 2u] [x2] + [0][x2]
|
||||||
|
|
||||||
|
% A = [1 0; 0 u]; b = [0; 0]
|
||||||
|
|
||||||
|
%% Exercise 3.2
|
||||||
|
xaxis = -10:0.1:10;
|
||||||
|
yaxis = xaxis;
|
||||||
|
Zn = zeros(size(xaxis, 2), size(yaxis, 2));
|
||||||
|
Zs = {Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn,Zn};
|
||||||
|
|
||||||
|
for u = 1:10
|
||||||
|
A = [1 0; 0 u];
|
||||||
|
|
||||||
|
for i = 1:size(xaxis, 2)
|
||||||
|
for j = 1:size(yaxis, 2)
|
||||||
|
vec = [xaxis(i); yaxis(j)];
|
||||||
|
Zs{u}(i, j) = vec' * A * vec;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for u = 1:10
|
||||||
|
subplot(colh, colw, u);
|
||||||
|
h = surf(xaxis, yaxis, Zs{u});
|
||||||
|
set(h,'LineStyle','none');
|
||||||
|
title(sprintf("u=%d", u));
|
||||||
|
end
|
||||||
|
sgtitle("Surf plots");
|
||||||
|
|
||||||
|
% comment these lines on submission
|
||||||
|
% addpath /home/claudio/git/matlab2tikz/src
|
||||||
|
% matlab2tikz('showInfo', false, './surf.tex')
|
||||||
|
|
||||||
|
figure
|
||||||
|
|
||||||
|
% max iterations
|
||||||
|
c = 100;
|
||||||
|
|
||||||
|
yi = zeros(30, c);
|
||||||
|
ni = zeros(30, c);
|
||||||
|
its = zeros(30, 1);
|
||||||
|
|
||||||
|
for u = 1:10
|
||||||
|
subplot(colh, colw, u);
|
||||||
|
contour(xaxis, yaxis, Zs{u}, 10);
|
||||||
|
title(sprintf("u=%d", u));
|
||||||
|
|
||||||
|
|
||||||
|
%% Exercise 3.3
|
||||||
|
|
||||||
|
A = [2 0; 0 2*u];
|
||||||
|
b = [0; 0];
|
||||||
|
xs = [[0; 10] [10; 0] [10; 10]];
|
||||||
|
|
||||||
|
syms sx sy
|
||||||
|
f = 1/2 * [sx sy] * A * [sx; sy];
|
||||||
|
|
||||||
|
g = gradient(f, [sx; sy]);
|
||||||
|
|
||||||
|
hold on
|
||||||
|
j = 1;
|
||||||
|
for x0 = xs
|
||||||
|
ri = u * 3 - 3 + j;
|
||||||
|
x = x0;
|
||||||
|
i = 1;
|
||||||
|
|
||||||
|
xi = zeros(2, c);
|
||||||
|
xi(:, 1) = x0;
|
||||||
|
|
||||||
|
yi(ri, 1) = subs(f, [sx sy], x0');
|
||||||
|
|
||||||
|
while i <= c
|
||||||
|
p = -1 * double(subs(g, [sx sy], x'));
|
||||||
|
|
||||||
|
ni(ri, i) = log10(norm(p, 2));
|
||||||
|
if norm(p, 2) == 0 || ni(ri, i) <= -8
|
||||||
|
break
|
||||||
|
end
|
||||||
|
|
||||||
|
alpha = dot(b - A * x, p) / dot(A * p, p);
|
||||||
|
|
||||||
|
x = x + alpha * p;
|
||||||
|
i = i + 1;
|
||||||
|
|
||||||
|
xi(:, i) = x;
|
||||||
|
yi(ri, i) = subs(f, [sx sy], x');
|
||||||
|
end
|
||||||
|
|
||||||
|
xi = xi(:, 1:i);
|
||||||
|
|
||||||
|
plot(xi(1, :), xi(2, :), '-');
|
||||||
|
|
||||||
|
fprintf("u=%2d x0=[%2d,%2d] it=%2d x=[%d,%d]\n", u, ...
|
||||||
|
x0(1), x0(2), i, x(1), x(2));
|
||||||
|
|
||||||
|
its(ri) = i;
|
||||||
|
|
||||||
|
j = j + 1;
|
||||||
|
end
|
||||||
|
hold off
|
||||||
|
end
|
||||||
|
sgtitle("Contour plots and iteration steps");
|
||||||
|
|
||||||
|
% comment these lines on submission
|
||||||
|
% addpath /home/claudio/git/matlab2tikz/src
|
||||||
|
% matlab2tikz('showInfo', false, './contour.tex')
|
||||||
|
|
||||||
|
figure
|
||||||
|
|
||||||
|
for u = 1:10
|
||||||
|
subplot(colh, colw, u);
|
||||||
|
title(sprintf("u=%d", u));
|
||||||
|
hold on
|
||||||
|
for j = 1:3
|
||||||
|
ri = u * 3 - 3 + j;
|
||||||
|
vec = yi(ri, :);
|
||||||
|
vec = vec(1:its(ri));
|
||||||
|
|
||||||
|
plot(1:its(ri), vec);
|
||||||
|
end
|
||||||
|
hold off
|
||||||
|
end
|
||||||
|
sgtitle("Iterations over values of objective function");
|
||||||
|
|
||||||
|
% comment these lines on submission
|
||||||
|
% addpath /home/claudio/git/matlab2tikz/src
|
||||||
|
% matlab2tikz('showInfo', false, './yseries.tex')
|
||||||
|
|
||||||
|
figure
|
||||||
|
|
||||||
|
for u = 1:10
|
||||||
|
subplot(colh, colw, u);
|
||||||
|
hold on
|
||||||
|
for j = 1:3
|
||||||
|
ri = u * 3 - 3 + j;
|
||||||
|
vec = ni(ri, :);
|
||||||
|
vec = vec(1:its(ri));
|
||||||
|
|
||||||
|
plot(1:its(ri), vec, '-o');
|
||||||
|
end
|
||||||
|
hold off
|
||||||
|
title(sprintf("u=%d", u));
|
||||||
|
end
|
||||||
|
sgtitle("Iterations over log10 of gradient norms");
|
||||||
|
|
||||||
|
% comment these lines on submission
|
||||||
|
% addpath /home/claudio/git/matlab2tikz/src
|
||||||
|
% matlab2tikz('showInfo', false, './norms.tex')
|
||||||
|
|
|
@ -67,12 +67,11 @@ for u = 1:10
|
||||||
|
|
||||||
%% Exercise 3.3
|
%% Exercise 3.3
|
||||||
|
|
||||||
A = [2 0; 0 2*u];
|
A = [1 0; 0 1*u];
|
||||||
b = [0; 0];
|
|
||||||
xs = [[0; 10] [10; 0] [10; 10]];
|
xs = [[0; 10] [10; 0] [10; 10]];
|
||||||
|
|
||||||
syms sx sy
|
syms sx sy
|
||||||
f = 1/2 * [sx sy] * A * [sx; sy];
|
f = [sx sy] * A * [sx; sy];
|
||||||
|
|
||||||
g = gradient(f, [sx; sy]);
|
g = gradient(f, [sx; sy]);
|
||||||
|
|
||||||
|
@ -96,7 +95,7 @@ for u = 1:10
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
|
|
||||||
alpha = dot(b - A * x, p) / dot(A * p, p);
|
alpha = dot(-A * x, p) / dot(A * p, p);
|
||||||
|
|
||||||
x = x + alpha * p;
|
x = x + alpha * p;
|
||||||
i = i + 1;
|
i = i + 1;
|
||||||
|
|
Reference in a new issue