From 7355b27cd37fd0e8fe9ba9f8a74367b50dc71729 Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Tue, 22 Sep 2020 13:34:48 +0200 Subject: [PATCH] Done 5.1, check 5.2 impl (convergence error is a bit high) --- mp1/files_data/ex2.m | 22 ---------- mp1/files_data/pagerank1.m | 72 ++++++++++++++++++++++++++++++++ mp1/files_data/pagerank2.m | 84 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 22 deletions(-) delete mode 100644 mp1/files_data/ex2.m create mode 100644 mp1/files_data/pagerank1.m create mode 100644 mp1/files_data/pagerank2.m diff --git a/mp1/files_data/ex2.m b/mp1/files_data/ex2.m deleted file mode 100644 index 64b3d02..0000000 --- a/mp1/files_data/ex2.m +++ /dev/null @@ -1,22 +0,0 @@ -%[U,G] = surfer('https://www.usi.ch',500); -% pagerank(U,G); - -% A = (1/40) * [1 1 1 35 1 1; -% 18 1 1 1 1 1; -% 18 18 1 1 1 1; -% 1 18 35 1 1 1; -% 1 1 1 1 1 35; -% 1 1 1 1 35 1]; -A = (1/40) * [ - 0 0 0 40 0 0; - 20 0 0 0 0 0; - 20 20 0 0 0 0; - 0 20 40 0 0 0; - 0 0 0 0 0 40; - 0 0 0 0 40 0]; - -[v,d] = eig(A); - -display(d); -display(v); - diff --git a/mp1/files_data/pagerank1.m b/mp1/files_data/pagerank1.m new file mode 100644 index 0000000..63d73dd --- /dev/null +++ b/mp1/files_data/pagerank1.m @@ -0,0 +1,72 @@ +function x = pagerank1(U,G,p) +% PAGERANK Google's PageRank +% pagerank(U,G,p) uses the URLs and adjacency matrix produced by SURFER, +% together with a damping factory p, (default is .85), to compute and plot +% a bar graph of page rank, and print the dominant URLs in page rank order. +% x = pagerank(U,G,p) returns the page ranks instead of printing. +% See also SURFER, SPY. + +if nargin < 3, p = .85; end + +% Eliminate any self-referential links +%G = G - diag(diag(G)); + +% c = out-degree, r = in-degree +[~,n] = size(G); +c = sum(G,1); +r = sum(G,2); + +% Scale column sums to be 1 (or 0 where there are no out links). +k = find(c~=0); +D = sparse(k,k,1./c(k),n,n); + +e = ones(n,1); + +% ----------------------------- POWER METHOD ------------------------------ +disp('Using power method implementation\n'); + +x = ones(n, 1) * 1/n; + +G = p * G * D; +z = ((1 - p) * (c ~= 0) + (c == 0))/n; +it = 0; + +new_x = x; +old_norm = -1; +no = +Inf; + +while old_norm == -1 || old_norm > no + x = new_x; + old_norm = no; + new_x = G * x + e * (z * x); + new_x = new_x/sum(new_x); + it = it + 1; + no = norm(new_x - x, 2); +end + +% ------------------------------------------------------------------------- + +% Normalize so that sum(x) == 1. +x = x/sum(x); + +% Bar graph of page rank. +shg +bar(x) +title('Page Rank') + +% Print URLs in page rank order. + +if nargout < 1 + [~,q] = sort(-x); + disp(' page-rank in out url') + k = 1; + maxN = length(U); + while (k <= maxN) && (x(q(k)) >= .005) + disp(k) + j = q(k); + temp1 = r(j); + temp2 = c(j); + disp(fprintf(' %3.0f %8.4f %4.0f %4.0f %s', j,x(j),full(temp1),full(temp2),U{j})) + k = k+1; + end +end diff --git a/mp1/files_data/pagerank2.m b/mp1/files_data/pagerank2.m new file mode 100644 index 0000000..3d4ced6 --- /dev/null +++ b/mp1/files_data/pagerank2.m @@ -0,0 +1,84 @@ +function x = pagerank2(U,G,p) +% PAGERANK Google's PageRank +% pagerank(U,G,p) uses the URLs and adjacency matrix produced by SURFER, +% together with a damping factory p, (default is .85), to compute and plot +% a bar graph of page rank, and print the dominant URLs in page rank order. +% x = pagerank(U,G,p) returns the page ranks instead of printing. +% See also SURFER, SPY. + +if nargin < 3, p = .85; end + +% Eliminate any self-referential links +%G = G - diag(diag(G)); + +% c = out-degree, r = in-degree +[~,n] = size(G); +c = sum(G,1); +r = sum(G,2); + +% Scale column sums to be 1 (or 0 where there are no out links). +k = find(c~=0); +D = sparse(k,k,1./c(k),n,n); + +e = ones(n,1); +I = speye(n,n); + +% ---------------------------- INVERSE ITERATION -------------------------- +disp('Using inverse iteration implementation\n'); + +z = ((1 - p) * (c ~= 0) + (c == 0)) / n; +A = p * G * D + e * z; +x = e/n; + +% Check if B will be a singular matrix. If so, change it +alpha = 1; +while rcond(A - alpha * I) <= eps + alpha = alpha + 0.01; +end +display(alpha); + +B = (A - alpha * I); + +old_x = zeros(n, 1); +old_norm = -1; +no = +Inf; + +it = 0; +while old_norm == -1 || old_norm > no + old_norm = no; + old_x = x; + x = B \ x; + x = x/norm(x, 2); + it = it + 1; + no = norm(x - old_x, 2); +end + +x = old_x; +display(it); + +% ------------------------------------------------------------------------- + +% Normalize so that sum(x) == 1. +x = x/sum(x); + +% Bar graph of page rank. +shg +bar(x) +title('Page Rank') + +% Print URLs in page rank order. + +if nargout < 1 + [~,q] = sort(-x); + disp(' page-rank in out url') + k = 1; + maxN = length(U); + while (k <= maxN) && (x(q(k)) >= .005) + disp(k) + j = q(k); + temp1 = r(j); + temp2 = c(j); + disp(fprintf(' %3.0f %8.4f %4.0f %4.0f %s', j,x(j),full(temp1),full(temp2),U{j})) + k = k+1; + end +end