From 8013c535e6a6e410e435d8560ad182136dfab296 Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Mon, 3 Oct 2022 21:44:34 +0200 Subject: [PATCH] hw1: small allocation fix on ex2 --- .../matmult/dgemm-blocked.c | 4 +- .../matmult/timing.ps | 296 +++++++++--------- .../matmult/timing_basic_dgemm.data | 54 ++-- .../matmult/timing_blas_dgemm.data | 54 ++-- .../matmult/timing_blocked_dgemm.data | 54 ++-- 5 files changed, 231 insertions(+), 231 deletions(-) diff --git a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c index 78d5859..5fbd9f2 100644 --- a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c +++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c @@ -17,7 +17,7 @@ LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKL const char* dgemm_desc = "Block-based dgemm."; -const int block_size = 27; +const int block_size = 26; inline int min(int a, int b) { return a < b ? a : b; @@ -45,10 +45,10 @@ inline void store_c(double* C, double* C_temp, int r_min, int r_max, int c_min, * On exit, A and B maintain their input values. */ void square_dgemm(int n, double* A, double* B, double* C) { double A_row[n * n]; + double row_tmp[n]; double C_temp[block_size * block_size]; for (int m = 0; m < n; ++m) { - double row_tmp[n]; memcpy(row_tmp, A + m * n, n * sizeof(double)); for (int l = 0; l < n; ++l) { diff --git a/Project1/project_1_maggioni_claudio/matmult/timing.ps b/Project1/project_1_maggioni_claudio/matmult/timing.ps index 1c50bb8..31bc716 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing.ps +++ b/Project1/project_1_maggioni_claudio/matmult/timing.ps @@ -1,7 +1,7 @@ %!PS-Adobe-2.0 %%Title: timing.ps %%Creator: gnuplot 5.2 patchlevel 8 -%%CreationDate: Mon Oct 3 21:19:18 2022 +%%CreationDate: Mon Oct 3 21:44:11 2022 %%DocumentFonts: (atend) %%BoundingBox: 50 50 554 770 %%Orientation: Landscape @@ -483,7 +483,7 @@ SDict begin [ /Creator (gnuplot 5.2 patchlevel 8) % /Producer (gnuplot) % /Keywords () - /CreationDate (Mon Oct 3 21:19:18 2022) + /CreationDate (Mon Oct 3 21:44:11 2022) /DOCINFO pdfmark end } ifelse @@ -961,57 +961,57 @@ LTb 6380 4486 M 399 0 V 1171 2365 M -7 -1 V -481 -108 V -8 -1 V -225 -22 V -7 -72 V -8 71 V -466 -51 V -7 -29 V -278 23 V -195 0 V -8 -472 V -7 469 V -466 -6 V -8 -69 V -7 77 V -721 -2 V -466 -5 V -7 -46 V -233 45 V -8 -473 V -954 464 V -7 -451 V -954 450 V -8 -550 V -7 546 V +7 2 V +481 -111 V +8 0 V +225 -23 V +7 -63 V +8 57 V +466 -55 V +7 -15 V +278 7 V +195 2 V +8 -480 V +7 483 V +466 -9 V +8 -112 V +7 125 V +721 -3 V +466 0 V +7 -19 V +233 20 V +8 -480 V +954 473 V +7 -573 V +954 564 V +8 -457 V +7 460 V 1171 2365 Pls -1178 2364 Pls +1178 2367 Pls 1659 2256 Pls -1667 2255 Pls +1667 2256 Pls 1892 2233 Pls -1899 2161 Pls -1907 2232 Pls -2373 2181 Pls -2380 2152 Pls -2658 2175 Pls -2853 2175 Pls -2861 1703 Pls -2868 2172 Pls -3334 2166 Pls -3342 2097 Pls -3349 2174 Pls -4070 2172 Pls -4536 2167 Pls -4543 2121 Pls -4776 2166 Pls -4784 1693 Pls -5738 2157 Pls -5745 1706 Pls -6699 2156 Pls -6707 1606 Pls -6714 2152 Pls +1899 2170 Pls +1907 2227 Pls +2373 2172 Pls +2380 2157 Pls +2658 2164 Pls +2853 2166 Pls +2861 1686 Pls +2868 2169 Pls +3334 2160 Pls +3342 2048 Pls +3349 2173 Pls +4070 2170 Pls +4536 2170 Pls +4543 2151 Pls +4776 2171 Pls +4784 1691 Pls +5738 2164 Pls +5745 1591 Pls +6699 2155 Pls +6707 1698 Pls +6714 2158 Pls 6579 4486 Pls % End plot #1 % Begin plot #2 @@ -1030,58 +1030,58 @@ LTb 0.00 0.62 0.45 C 6380 4346 M 399 0 V -1171 2307 M -7 13 V -481 34 V -8 4 V -225 4 V -7 -53 V -8 21 V -466 17 V -7 4 V -278 -3 V -195 16 V -8 -17 V -7 1 V -466 -7 V -8 23 V -7 5 V -721 -1 V -466 -24 V -7 13 V -233 9 V -8 2 V -954 -1 V -7 2 V -954 -4 V -8 -9 V -7 4 V -1171 2307 Crs -1178 2320 Crs -1659 2354 Crs -1667 2358 Crs -1892 2362 Crs -1899 2309 Crs -1907 2330 Crs -2373 2347 Crs -2380 2351 Crs -2658 2348 Crs -2853 2364 Crs -2861 2347 Crs -2868 2348 Crs -3334 2341 Crs -3342 2364 Crs -3349 2369 Crs -4070 2368 Crs -4536 2344 Crs -4543 2357 Crs -4776 2366 Crs -4784 2368 Crs -5738 2367 Crs -5745 2369 Crs -6699 2365 Crs -6707 2356 Crs -6714 2360 Crs +1171 2290 M +7 21 V +481 37 V +8 -1 V +225 2 V +7 -12 V +8 13 V +466 3 V +7 0 V +278 2 V +195 -5 V +8 -7 V +7 6 V +466 8 V +8 1 V +7 0 V +721 3 V +466 -38 V +7 -12 V +233 10 V +8 16 V +954 2 V +7 -7 V +954 16 V +8 10 V +7 -11 V +1171 2290 Crs +1178 2311 Crs +1659 2348 Crs +1667 2347 Crs +1892 2349 Crs +1899 2337 Crs +1907 2350 Crs +2373 2353 Crs +2380 2353 Crs +2658 2355 Crs +2853 2350 Crs +2861 2343 Crs +2868 2349 Crs +3334 2357 Crs +3342 2358 Crs +3349 2358 Crs +4070 2361 Crs +4536 2323 Crs +4543 2311 Crs +4776 2321 Crs +4784 2337 Crs +5738 2339 Crs +5745 2332 Crs +6699 2348 Crs +6707 2358 Crs +6714 2347 Crs 6579 4346 Crs % End plot #2 % Begin plot #3 @@ -1100,58 +1100,58 @@ LTb 0.34 0.71 0.91 C 6380 4206 M 399 0 V -1171 3798 M -7 71 V -481 76 V -8 -23 V -225 -32 V -7 49 V -8 -22 V -466 14 V -7 57 V -278 -14 V -195 -12 V -8 21 V -7 -22 V +1171 3743 M +7 112 V +481 86 V +8 -53 V +225 -26 V +7 81 V +8 -27 V +466 20 V +7 10 V +278 28 V +195 -16 V +8 0 V +7 4 V 466 12 V 8 35 V 7 -16 V -721 8 V -466 4 V -7 29 V -233 -47 V -8 30 V -954 1 V -7 -5 V -954 -15 V +721 11 V +466 -55 V +7 74 V +233 -53 V 8 41 V -7 -22 V -1171 3798 Star -1178 3869 Star -1659 3945 Star -1667 3922 Star -1892 3890 Star -1899 3939 Star -1907 3917 Star -2373 3931 Star -2380 3988 Star +954 -22 V +7 22 V +954 -14 V +8 -64 V +7 17 V +1171 3743 Star +1178 3855 Star +1659 3941 Star +1667 3888 Star +1892 3862 Star +1899 3943 Star +1907 3916 Star +2373 3936 Star +2380 3946 Star 2658 3974 Star -2853 3962 Star -2861 3983 Star -2868 3961 Star -3334 3973 Star -3342 4008 Star -3349 3992 Star -4070 4000 Star -4536 4004 Star -4543 4033 Star -4776 3986 Star -4784 4016 Star -5738 4017 Star -5745 4012 Star +2853 3958 Star +2861 3958 Star +2868 3962 Star +3334 3974 Star +3342 4009 Star +3349 3993 Star +4070 4004 Star +4536 3949 Star +4543 4023 Star +4776 3970 Star +4784 4011 Star +5738 3989 Star +5745 4011 Star 6699 3997 Star -6707 4038 Star -6714 4016 Star +6707 3933 Star +6714 3950 Star 6579 4206 Star % End plot #3 2.000 UL diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data index 2adf2b0..a0f2fb5 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data @@ -1,29 +1,29 @@ #Description: Naive, three-loop dgemm. -Size: 31 Mflop/s: 2392.32 Percentage: 6.50 -Size: 32 Mflop/s: 2389.31 Percentage: 6.49 -Size: 96 Mflop/s: 1996.82 Percentage: 5.43 -Size: 97 Mflop/s: 1995.22 Percentage: 5.42 -Size: 127 Mflop/s: 1923.52 Percentage: 5.23 -Size: 128 Mflop/s: 1707.73 Percentage: 4.64 -Size: 129 Mflop/s: 1917.74 Percentage: 5.21 -Size: 191 Mflop/s: 1763.08 Percentage: 4.79 -Size: 192 Mflop/s: 1681.61 Percentage: 4.57 -Size: 229 Mflop/s: 1746.72 Percentage: 4.75 -Size: 255 Mflop/s: 1745.99 Percentage: 4.74 -Size: 256 Mflop/s: 799.816 Percentage: 2.17 -Size: 257 Mflop/s: 1737 Percentage: 4.72 -Size: 319 Mflop/s: 1721.21 Percentage: 4.68 -Size: 320 Mflop/s: 1535.18 Percentage: 4.17 -Size: 321 Mflop/s: 1743.01 Percentage: 4.74 -Size: 417 Mflop/s: 1737.5 Percentage: 4.72 -Size: 479 Mflop/s: 1724.83 Percentage: 4.69 -Size: 480 Mflop/s: 1597.39 Percentage: 4.34 -Size: 511 Mflop/s: 1720.23 Percentage: 4.67 -Size: 512 Mflop/s: 785.527 Percentage: 2.13 -Size: 639 Mflop/s: 1694.76 Percentage: 4.61 -Size: 640 Mflop/s: 803.077 Percentage: 2.18 -Size: 767 Mflop/s: 1692.02 Percentage: 4.60 -Size: 768 Mflop/s: 680.299 Percentage: 1.85 -Size: 769 Mflop/s: 1679.99 Percentage: 4.57 -#Average percentage of Peak = 4.48494 +Size: 31 Mflop/s: 2393.33 Percentage: 6.50 +Size: 32 Mflop/s: 2400.13 Percentage: 6.52 +Size: 96 Mflop/s: 1998.74 Percentage: 5.43 +Size: 97 Mflop/s: 1996.01 Percentage: 5.42 +Size: 127 Mflop/s: 1923.81 Percentage: 5.23 +Size: 128 Mflop/s: 1731.98 Percentage: 4.71 +Size: 129 Mflop/s: 1903.31 Percentage: 5.17 +Size: 191 Mflop/s: 1736.78 Percentage: 4.72 +Size: 192 Mflop/s: 1694.44 Percentage: 4.60 +Size: 229 Mflop/s: 1715.1 Percentage: 4.66 +Size: 255 Mflop/s: 1720.39 Percentage: 4.67 +Size: 256 Mflop/s: 777.65 Percentage: 2.11 +Size: 257 Mflop/s: 1729.27 Percentage: 4.70 +Size: 319 Mflop/s: 1704.8 Percentage: 4.63 +Size: 320 Mflop/s: 1414.84 Percentage: 3.84 +Size: 321 Mflop/s: 1741.3 Percentage: 4.73 +Size: 417 Mflop/s: 1733 Percentage: 4.71 +Size: 479 Mflop/s: 1731.17 Percentage: 4.70 +Size: 480 Mflop/s: 1678.77 Percentage: 4.56 +Size: 511 Mflop/s: 1733.6 Percentage: 4.71 +Size: 512 Mflop/s: 782.96 Percentage: 2.13 +Size: 639 Mflop/s: 1714.42 Percentage: 4.66 +Size: 640 Mflop/s: 663.418 Percentage: 1.80 +Size: 767 Mflop/s: 1690.82 Percentage: 4.59 +Size: 768 Mflop/s: 792.043 Percentage: 2.15 +Size: 769 Mflop/s: 1696.95 Percentage: 4.61 +#Average percentage of Peak = 4.47314 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data index dee9381..ac799da 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data @@ -1,29 +1,29 @@ #Description: Reference dgemm. -Size: 31 Mflop/s: 25682 Percentage: 69.79 -Size: 32 Mflop/s: 28883 Percentage: 78.49 -Size: 96 Mflop/s: 32725.3 Percentage: 88.93 -Size: 97 Mflop/s: 31538.2 Percentage: 85.70 -Size: 127 Mflop/s: 29895.3 Percentage: 81.24 -Size: 128 Mflop/s: 32447.3 Percentage: 88.17 -Size: 129 Mflop/s: 31273.9 Percentage: 84.98 -Size: 191 Mflop/s: 32026.4 Percentage: 87.03 -Size: 192 Mflop/s: 35140.4 Percentage: 95.49 -Size: 229 Mflop/s: 34375.4 Percentage: 93.41 -Size: 255 Mflop/s: 33708.5 Percentage: 91.60 -Size: 256 Mflop/s: 34891.4 Percentage: 94.81 -Size: 257 Mflop/s: 33617.2 Percentage: 91.35 -Size: 319 Mflop/s: 34283 Percentage: 93.16 -Size: 320 Mflop/s: 36365.1 Percentage: 98.82 -Size: 321 Mflop/s: 35399.5 Percentage: 96.19 -Size: 417 Mflop/s: 35893.2 Percentage: 97.54 -Size: 479 Mflop/s: 36084.1 Percentage: 98.05 -Size: 480 Mflop/s: 37868.8 Percentage:102.90 -Size: 511 Mflop/s: 35047.8 Percentage: 95.24 -Size: 512 Mflop/s: 36868 Percentage:100.18 -Size: 639 Mflop/s: 36890.8 Percentage:100.25 -Size: 640 Mflop/s: 36603 Percentage: 99.46 -Size: 767 Mflop/s: 35669.9 Percentage: 96.93 -Size: 768 Mflop/s: 38223.9 Percentage:103.87 -Size: 769 Mflop/s: 36856.3 Percentage:100.15 -#Average percentage of Peak = 92.8363 +Size: 31 Mflop/s: 23449.2 Percentage: 63.72 +Size: 32 Mflop/s: 28198.9 Percentage: 76.63 +Size: 96 Mflop/s: 32542.3 Percentage: 88.43 +Size: 97 Mflop/s: 29801.3 Percentage: 80.98 +Size: 127 Mflop/s: 28557.8 Percentage: 77.60 +Size: 128 Mflop/s: 32643.3 Percentage: 88.70 +Size: 129 Mflop/s: 31198.2 Percentage: 84.78 +Size: 191 Mflop/s: 32247.3 Percentage: 87.63 +Size: 192 Mflop/s: 32830.6 Percentage: 89.21 +Size: 229 Mflop/s: 34360.9 Percentage: 93.37 +Size: 255 Mflop/s: 33477.7 Percentage: 90.97 +Size: 256 Mflop/s: 33473.9 Percentage: 90.96 +Size: 257 Mflop/s: 33686.5 Percentage: 91.54 +Size: 319 Mflop/s: 34335.2 Percentage: 93.30 +Size: 320 Mflop/s: 36438.1 Percentage: 99.02 +Size: 321 Mflop/s: 35433.7 Percentage: 96.29 +Size: 417 Mflop/s: 36133.7 Percentage: 98.19 +Size: 479 Mflop/s: 32951.4 Percentage: 89.54 +Size: 480 Mflop/s: 37260 Percentage:101.25 +Size: 511 Mflop/s: 34128 Percentage: 92.74 +Size: 512 Mflop/s: 36526.4 Percentage: 99.26 +Size: 639 Mflop/s: 35249.2 Percentage: 95.79 +Size: 640 Mflop/s: 36538.7 Percentage: 99.29 +Size: 767 Mflop/s: 35718.5 Percentage: 97.06 +Size: 768 Mflop/s: 32116.8 Percentage: 87.27 +Size: 769 Mflop/s: 33033.9 Percentage: 89.77 +#Average percentage of Peak = 90.1266 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data index 4748042..6a720f3 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data @@ -1,29 +1,29 @@ #Description: Block-based dgemm. -Size: 31 Mflop/s: 2174.53 Percentage: 5.91 -Size: 32 Mflop/s: 2219.23 Percentage: 6.03 -Size: 96 Mflop/s: 2347.86 Percentage: 6.38 -Size: 97 Mflop/s: 2366.05 Percentage: 6.43 -Size: 127 Mflop/s: 2380.58 Percentage: 6.47 -Size: 128 Mflop/s: 2180.15 Percentage: 5.92 -Size: 129 Mflop/s: 2257.65 Percentage: 6.13 -Size: 191 Mflop/s: 2323.3 Percentage: 6.31 -Size: 192 Mflop/s: 2338.71 Percentage: 6.36 -Size: 229 Mflop/s: 2327.57 Percentage: 6.32 -Size: 255 Mflop/s: 2389.61 Percentage: 6.49 -Size: 256 Mflop/s: 2321.28 Percentage: 6.31 -Size: 257 Mflop/s: 2327.47 Percentage: 6.32 -Size: 319 Mflop/s: 2297.83 Percentage: 6.24 -Size: 320 Mflop/s: 2386.76 Percentage: 6.49 -Size: 321 Mflop/s: 2408.12 Percentage: 6.54 -Size: 417 Mflop/s: 2405.69 Percentage: 6.54 -Size: 479 Mflop/s: 2311.13 Percentage: 6.28 -Size: 480 Mflop/s: 2359.66 Percentage: 6.41 -Size: 511 Mflop/s: 2396.08 Percentage: 6.51 -Size: 512 Mflop/s: 2403.92 Percentage: 6.53 -Size: 639 Mflop/s: 2401.71 Percentage: 6.53 -Size: 640 Mflop/s: 2408.37 Percentage: 6.54 -Size: 767 Mflop/s: 2394.15 Percentage: 6.51 -Size: 768 Mflop/s: 2358.31 Percentage: 6.41 -Size: 769 Mflop/s: 2372.9 Percentage: 6.45 -#Average percentage of Peak = 6.36064 +Size: 31 Mflop/s: 2112.63 Percentage: 5.74 +Size: 32 Mflop/s: 2187.44 Percentage: 5.94 +Size: 96 Mflop/s: 2325.39 Percentage: 6.32 +Size: 97 Mflop/s: 2322.81 Percentage: 6.31 +Size: 127 Mflop/s: 2330.3 Percentage: 6.33 +Size: 128 Mflop/s: 2282.93 Percentage: 6.20 +Size: 129 Mflop/s: 2334.25 Percentage: 6.34 +Size: 191 Mflop/s: 2345.91 Percentage: 6.37 +Size: 192 Mflop/s: 2345.38 Percentage: 6.37 +Size: 229 Mflop/s: 2351.01 Percentage: 6.39 +Size: 255 Mflop/s: 2335.21 Percentage: 6.35 +Size: 256 Mflop/s: 2306.48 Percentage: 6.27 +Size: 257 Mflop/s: 2330.68 Percentage: 6.33 +Size: 319 Mflop/s: 2360.03 Percentage: 6.41 +Size: 320 Mflop/s: 2364.53 Percentage: 6.43 +Size: 321 Mflop/s: 2366.38 Percentage: 6.43 +Size: 417 Mflop/s: 2378.34 Percentage: 6.46 +Size: 479 Mflop/s: 2233.05 Percentage: 6.07 +Size: 480 Mflop/s: 2187.87 Percentage: 5.95 +Size: 511 Mflop/s: 2224.61 Percentage: 6.05 +Size: 512 Mflop/s: 2284.85 Percentage: 6.21 +Size: 639 Mflop/s: 2292.78 Percentage: 6.23 +Size: 640 Mflop/s: 2264.7 Percentage: 6.15 +Size: 767 Mflop/s: 2324.83 Percentage: 6.32 +Size: 768 Mflop/s: 2363.92 Percentage: 6.42 +Size: 769 Mflop/s: 2321.31 Percentage: 6.31 +#Average percentage of Peak = 6.25811