From 980eb5f0b9edfd63031518d9f2f82565c41dd740 Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Wed, 12 Oct 2022 11:02:19 +0200 Subject: [PATCH] hw1: compiler flags --- .../matmult/Makefile | 4 +- .../matmult/dgemm-blocked.c | 50 ++- .../matmult/timing.ps | 306 +++++++++--------- .../matmult/timing_basic_dgemm.data | 54 ++-- .../matmult/timing_blas_dgemm.data | 54 ++-- .../matmult/timing_blocked_dgemm.data | 54 ++-- 6 files changed, 259 insertions(+), 263 deletions(-) diff --git a/Project1/project_1_maggioni_claudio/matmult/Makefile b/Project1/project_1_maggioni_claudio/matmult/Makefile index 1f22bd7..2a7d1f8 100755 --- a/Project1/project_1_maggioni_claudio/matmult/Makefile +++ b/Project1/project_1_maggioni_claudio/matmult/Makefile @@ -3,8 +3,8 @@ # CC = gcc -OPT = -O2 -CFLAGS = -Wall -std=gnu99 $(OPT) +OPT = -O3 +CFLAGS = -Wall -std=gnu99 -march=haswell -ffast-math $(OPT) LDFLAGS = -Wall # librt is needed for clock_gettime LDLIBS = -lrt -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm -ldl -m64 -I${MKLROOT}/include diff --git a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c index 5fbd9f2..e962489 100644 --- a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c +++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c @@ -15,29 +15,11 @@ LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKL */ +#define MIN(a,b) (((a)<(b))?(a):(b)) + const char* dgemm_desc = "Block-based dgemm."; -const int block_size = 26; - -inline int min(int a, int b) { - return a < b ? a : b; -} - -inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A_row, double* B, double* C_temp) { - for (int i = r_min, ii = 0; i < r_max; ++i, ++ii) { - for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) { - for (int k = k_min; k < k_max; k++) { - C_temp[ii + jj * block_size] += A_row[i * n + k] * B[k + j * n]; - } - } - } -} - -inline void store_c(double* C, double* C_temp, int r_min, int r_max, int c_min, int c_max, int n) { - for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) { - memcpy(C + j * n + r_min, C_temp + jj * block_size, (r_max - r_min) * sizeof(double)); - } -} +int block_size = 32; /* This routine performs a dgemm operation * C := C + A * B @@ -57,19 +39,33 @@ void square_dgemm(int n, double* A, double* B, double* C) { } for (int i = 0; i < n; i += block_size) { - int i_next = min(i + block_size, n); + int i_next = MIN(i + block_size, n); for (int j = 0; j < n; j += block_size) { - int j_next = min(j + block_size, n); - + int j_next = MIN(j + block_size, n); + + // clear matrix C_temp memset(C_temp, 0, block_size * block_size * sizeof(double)); for (int k = 0; k < n; k += block_size) { - int k_next = min(k + block_size, n); - naivemm(i, i_next, k, k_next, j, j_next, n, A_row, B, C_temp); + int k_next = MIN(k + block_size, n); + + // begin naivemm + for (int i2 = i, ii2 = 0; i2 < i_next; ++i2, ++ii2) { + for (int j2 = j, jj2 = 0; j2 < j_next; ++j2, ++jj2) { + for (int k2 = k; k2 < k_next; k2++) { + C_temp[ii2 + jj2 * block_size] += A_row[i2 * n + k2] * B[k2 + j2 * n]; + } + } + } + // end naivemm } - store_c(C, C_temp, i, i_next, j, j_next, n); + // store C_temp in C + for (int j2 = j, jj2 = 0; j2 < j_next; ++j2, ++jj2) { + memcpy(C + j2 * n + i, C_temp + jj2 * block_size, (i_next - i) * sizeof(double)); + } + // end store C_temp } } } diff --git a/Project1/project_1_maggioni_claudio/matmult/timing.ps b/Project1/project_1_maggioni_claudio/matmult/timing.ps index 31bc716..a8438c8 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing.ps +++ b/Project1/project_1_maggioni_claudio/matmult/timing.ps @@ -1,7 +1,7 @@ %!PS-Adobe-2.0 %%Title: timing.ps %%Creator: gnuplot 5.2 patchlevel 8 -%%CreationDate: Mon Oct 3 21:44:11 2022 +%%CreationDate: Wed Oct 12 11:03:04 2022 %%DocumentFonts: (atend) %%BoundingBox: 50 50 554 770 %%Orientation: Landscape @@ -483,7 +483,7 @@ SDict begin [ /Creator (gnuplot 5.2 patchlevel 8) % /Producer (gnuplot) % /Keywords () - /CreationDate (Mon Oct 3 21:44:11 2022) + /CreationDate (Wed Oct 12 11:03:04 2022) /DOCINFO pdfmark end } ifelse @@ -960,58 +960,58 @@ LTb 0.58 0.00 0.83 C 6380 4486 M 399 0 V -1171 2365 M -7 2 V -481 -111 V -8 0 V -225 -23 V -7 -63 V -8 57 V -466 -55 V -7 -15 V -278 7 V -195 2 V -8 -480 V -7 483 V -466 -9 V -8 -112 V -7 125 V -721 -3 V -466 0 V -7 -19 V -233 20 V -8 -480 V -954 473 V -7 -573 V -954 564 V -8 -457 V -7 460 V -1171 2365 Pls -1178 2367 Pls -1659 2256 Pls -1667 2256 Pls -1892 2233 Pls -1899 2170 Pls -1907 2227 Pls -2373 2172 Pls -2380 2157 Pls -2658 2164 Pls -2853 2166 Pls -2861 1686 Pls -2868 2169 Pls -3334 2160 Pls -3342 2048 Pls -3349 2173 Pls -4070 2170 Pls -4536 2170 Pls -4543 2151 Pls -4776 2171 Pls -4784 1691 Pls -5738 2164 Pls -5745 1591 Pls -6699 2155 Pls -6707 1698 Pls -6714 2158 Pls +1171 2529 M +7 42 V +481 -132 V +8 6 V +225 -40 V +7 -211 V +8 237 V +466 -92 V +7 -145 V +278 126 V +195 -12 V +8 -598 V +7 615 V +466 -18 V +8 -180 V +7 180 V +721 -13 V +466 -6 V +7 -79 V +233 81 V +8 -586 V +954 516 V +7 -505 V +954 487 V +8 -489 V +7 489 V +1171 2529 Pls +1178 2571 Pls +1659 2439 Pls +1667 2445 Pls +1892 2405 Pls +1899 2194 Pls +1907 2431 Pls +2373 2339 Pls +2380 2194 Pls +2658 2320 Pls +2853 2308 Pls +2861 1710 Pls +2868 2325 Pls +3334 2307 Pls +3342 2127 Pls +3349 2307 Pls +4070 2294 Pls +4536 2288 Pls +4543 2209 Pls +4776 2290 Pls +4784 1704 Pls +5738 2220 Pls +5745 1715 Pls +6699 2202 Pls +6707 1713 Pls +6714 2202 Pls 6579 4486 Pls % End plot #1 % Begin plot #2 @@ -1030,58 +1030,58 @@ LTb 0.00 0.62 0.45 C 6380 4346 M 399 0 V -1171 2290 M -7 21 V -481 37 V -8 -1 V -225 2 V -7 -12 V -8 13 V -466 3 V -7 0 V -278 2 V -195 -5 V -8 -7 V -7 6 V -466 8 V -8 1 V -7 0 V -721 3 V -466 -38 V -7 -12 V -233 10 V +1171 2651 M +7 199 V +481 31 V +8 -101 V +225 27 V +7 -19 V +8 -29 V +466 43 V +7 71 V +278 -65 V +195 -58 V +8 17 V +7 -19 V +466 73 V +8 66 V +7 -64 V +721 4 V +466 1 V +7 62 V +233 -146 V +8 15 V +954 71 V +7 8 V +954 -65 V 8 16 V -954 2 V -7 -7 V -954 16 V -8 10 V -7 -11 V -1171 2290 Crs -1178 2311 Crs -1659 2348 Crs -1667 2347 Crs -1892 2349 Crs -1899 2337 Crs -1907 2350 Crs -2373 2353 Crs -2380 2353 Crs -2658 2355 Crs -2853 2350 Crs -2861 2343 Crs -2868 2349 Crs -3334 2357 Crs -3342 2358 Crs -3349 2358 Crs -4070 2361 Crs -4536 2323 Crs -4543 2311 Crs -4776 2321 Crs -4784 2337 Crs -5738 2339 Crs -5745 2332 Crs -6699 2348 Crs -6707 2358 Crs -6714 2347 Crs +7 -18 V +1171 2651 Crs +1178 2850 Crs +1659 2881 Crs +1667 2780 Crs +1892 2807 Crs +1899 2788 Crs +1907 2759 Crs +2373 2802 Crs +2380 2873 Crs +2658 2808 Crs +2853 2750 Crs +2861 2767 Crs +2868 2748 Crs +3334 2821 Crs +3342 2887 Crs +3349 2823 Crs +4070 2827 Crs +4536 2828 Crs +4543 2890 Crs +4776 2744 Crs +4784 2759 Crs +5738 2830 Crs +5745 2838 Crs +6699 2773 Crs +6707 2789 Crs +6714 2771 Crs 6579 4346 Crs % End plot #2 % Begin plot #3 @@ -1100,58 +1100,58 @@ LTb 0.34 0.71 0.91 C 6380 4206 M 399 0 V -1171 3743 M -7 112 V -481 86 V -8 -53 V -225 -26 V -7 81 V -8 -27 V -466 20 V -7 10 V -278 28 V -195 -16 V -8 0 V -7 4 V -466 12 V +1171 3798 M +7 73 V +481 75 V +8 -21 V +225 -27 V +7 46 V +8 -18 V +466 10 V +7 58 V +278 -17 V +195 -13 V +8 25 V +7 -25 V +466 11 V 8 35 V 7 -16 V 721 11 V -466 -55 V -7 74 V -233 -53 V -8 41 V -954 -22 V -7 22 V -954 -14 V -8 -64 V -7 17 V -1171 3743 Star -1178 3855 Star -1659 3941 Star -1667 3888 Star -1892 3862 Star -1899 3943 Star -1907 3916 Star +466 0 V +7 29 V +233 -46 V +8 37 V +954 -7 V +7 21 V +954 -17 V +8 24 V +7 -26 V +1171 3798 Star +1178 3871 Star +1659 3946 Star +1667 3925 Star +1892 3898 Star +1899 3944 Star +1907 3926 Star 2373 3936 Star -2380 3946 Star -2658 3974 Star -2853 3958 Star -2861 3958 Star -2868 3962 Star -3334 3974 Star -3342 4009 Star -3349 3993 Star -4070 4004 Star -4536 3949 Star -4543 4023 Star -4776 3970 Star -4784 4011 Star -5738 3989 Star -5745 4011 Star -6699 3997 Star -6707 3933 Star -6714 3950 Star +2380 3994 Star +2658 3977 Star +2853 3964 Star +2861 3989 Star +2868 3964 Star +3334 3975 Star +3342 4010 Star +3349 3994 Star +4070 4005 Star +4536 4005 Star +4543 4034 Star +4776 3988 Star +4784 4025 Star +5738 4018 Star +5745 4039 Star +6699 4022 Star +6707 4046 Star +6714 4020 Star 6579 4206 Star % End plot #3 2.000 UL diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data index a0f2fb5..fa37d0f 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data @@ -1,29 +1,29 @@ #Description: Naive, three-loop dgemm. -Size: 31 Mflop/s: 2393.33 Percentage: 6.50 -Size: 32 Mflop/s: 2400.13 Percentage: 6.52 -Size: 96 Mflop/s: 1998.74 Percentage: 5.43 -Size: 97 Mflop/s: 1996.01 Percentage: 5.42 -Size: 127 Mflop/s: 1923.81 Percentage: 5.23 -Size: 128 Mflop/s: 1731.98 Percentage: 4.71 -Size: 129 Mflop/s: 1903.31 Percentage: 5.17 -Size: 191 Mflop/s: 1736.78 Percentage: 4.72 -Size: 192 Mflop/s: 1694.44 Percentage: 4.60 -Size: 229 Mflop/s: 1715.1 Percentage: 4.66 -Size: 255 Mflop/s: 1720.39 Percentage: 4.67 -Size: 256 Mflop/s: 777.65 Percentage: 2.11 -Size: 257 Mflop/s: 1729.27 Percentage: 4.70 -Size: 319 Mflop/s: 1704.8 Percentage: 4.63 -Size: 320 Mflop/s: 1414.84 Percentage: 3.84 -Size: 321 Mflop/s: 1741.3 Percentage: 4.73 -Size: 417 Mflop/s: 1733 Percentage: 4.71 -Size: 479 Mflop/s: 1731.17 Percentage: 4.70 -Size: 480 Mflop/s: 1678.77 Percentage: 4.56 -Size: 511 Mflop/s: 1733.6 Percentage: 4.71 -Size: 512 Mflop/s: 782.96 Percentage: 2.13 -Size: 639 Mflop/s: 1714.42 Percentage: 4.66 -Size: 640 Mflop/s: 663.418 Percentage: 1.80 -Size: 767 Mflop/s: 1690.82 Percentage: 4.59 -Size: 768 Mflop/s: 792.043 Percentage: 2.15 -Size: 769 Mflop/s: 1696.95 Percentage: 4.61 -#Average percentage of Peak = 4.47314 +Size: 31 Mflop/s: 3140.45 Percentage: 8.53 +Size: 32 Mflop/s: 3364.78 Percentage: 9.14 +Size: 96 Mflop/s: 2703.08 Percentage: 7.35 +Size: 97 Mflop/s: 2729.68 Percentage: 7.42 +Size: 127 Mflop/s: 2556.58 Percentage: 6.95 +Size: 128 Mflop/s: 1803.41 Percentage: 4.90 +Size: 129 Mflop/s: 2669.26 Percentage: 7.25 +Size: 191 Mflop/s: 2290.09 Percentage: 6.22 +Size: 192 Mflop/s: 1801.66 Percentage: 4.90 +Size: 229 Mflop/s: 2218.61 Percentage: 6.03 +Size: 255 Mflop/s: 2178.15 Percentage: 5.92 +Size: 256 Mflop/s: 808.413 Percentage: 2.20 +Size: 257 Mflop/s: 2238.93 Percentage: 6.08 +Size: 319 Mflop/s: 2174.45 Percentage: 5.91 +Size: 320 Mflop/s: 1612.13 Percentage: 4.38 +Size: 321 Mflop/s: 2173.64 Percentage: 5.91 +Size: 417 Mflop/s: 2125.36 Percentage: 5.78 +Size: 479 Mflop/s: 2107.13 Percentage: 5.73 +Size: 480 Mflop/s: 1848.43 Percentage: 5.02 +Size: 511 Mflop/s: 2112.99 Percentage: 5.74 +Size: 512 Mflop/s: 801.127 Percentage: 2.18 +Size: 639 Mflop/s: 1881.94 Percentage: 5.11 +Size: 640 Mflop/s: 815.847 Percentage: 2.22 +Size: 767 Mflop/s: 1825.75 Percentage: 4.96 +Size: 768 Mflop/s: 812.933 Percentage: 2.21 +Size: 769 Mflop/s: 1825.38 Percentage: 4.96 +#Average percentage of Peak = 5.4996 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data index ac799da..88570ba 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data @@ -1,29 +1,29 @@ #Description: Reference dgemm. -Size: 31 Mflop/s: 23449.2 Percentage: 63.72 -Size: 32 Mflop/s: 28198.9 Percentage: 76.63 -Size: 96 Mflop/s: 32542.3 Percentage: 88.43 -Size: 97 Mflop/s: 29801.3 Percentage: 80.98 -Size: 127 Mflop/s: 28557.8 Percentage: 77.60 -Size: 128 Mflop/s: 32643.3 Percentage: 88.70 -Size: 129 Mflop/s: 31198.2 Percentage: 84.78 -Size: 191 Mflop/s: 32247.3 Percentage: 87.63 -Size: 192 Mflop/s: 32830.6 Percentage: 89.21 -Size: 229 Mflop/s: 34360.9 Percentage: 93.37 -Size: 255 Mflop/s: 33477.7 Percentage: 90.97 -Size: 256 Mflop/s: 33473.9 Percentage: 90.96 -Size: 257 Mflop/s: 33686.5 Percentage: 91.54 -Size: 319 Mflop/s: 34335.2 Percentage: 93.30 -Size: 320 Mflop/s: 36438.1 Percentage: 99.02 -Size: 321 Mflop/s: 35433.7 Percentage: 96.29 -Size: 417 Mflop/s: 36133.7 Percentage: 98.19 -Size: 479 Mflop/s: 32951.4 Percentage: 89.54 -Size: 480 Mflop/s: 37260 Percentage:101.25 -Size: 511 Mflop/s: 34128 Percentage: 92.74 -Size: 512 Mflop/s: 36526.4 Percentage: 99.26 -Size: 639 Mflop/s: 35249.2 Percentage: 95.79 -Size: 640 Mflop/s: 36538.7 Percentage: 99.29 -Size: 767 Mflop/s: 35718.5 Percentage: 97.06 -Size: 768 Mflop/s: 32116.8 Percentage: 87.27 -Size: 769 Mflop/s: 33033.9 Percentage: 89.77 -#Average percentage of Peak = 90.1266 +Size: 31 Mflop/s: 25677.4 Percentage: 69.78 +Size: 32 Mflop/s: 28952.1 Percentage: 78.67 +Size: 96 Mflop/s: 32816.4 Percentage: 89.18 +Size: 97 Mflop/s: 31699.2 Percentage: 86.14 +Size: 127 Mflop/s: 30274.5 Percentage: 82.27 +Size: 128 Mflop/s: 32721.7 Percentage: 88.92 +Size: 129 Mflop/s: 31746.4 Percentage: 86.27 +Size: 191 Mflop/s: 32263.1 Percentage: 87.67 +Size: 192 Mflop/s: 35491.2 Percentage: 96.44 +Size: 229 Mflop/s: 34557.2 Percentage: 93.91 +Size: 255 Mflop/s: 33771.3 Percentage: 91.77 +Size: 256 Mflop/s: 35221.1 Percentage: 95.71 +Size: 257 Mflop/s: 33807.9 Percentage: 91.87 +Size: 319 Mflop/s: 34415.8 Percentage: 93.52 +Size: 320 Mflop/s: 36500.2 Percentage: 99.19 +Size: 321 Mflop/s: 35508.1 Percentage: 96.49 +Size: 417 Mflop/s: 36157.6 Percentage: 98.25 +Size: 479 Mflop/s: 36186.4 Percentage: 98.33 +Size: 480 Mflop/s: 37971.3 Percentage:103.18 +Size: 511 Mflop/s: 35144 Percentage: 95.50 +Size: 512 Mflop/s: 37362.5 Percentage:101.53 +Size: 639 Mflop/s: 36989.1 Percentage:100.51 +Size: 640 Mflop/s: 38267.8 Percentage:103.99 +Size: 767 Mflop/s: 37220.8 Percentage:101.14 +Size: 768 Mflop/s: 38744 Percentage:105.28 +Size: 769 Mflop/s: 37076.1 Percentage:100.75 +#Average percentage of Peak = 93.7023 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data index 6a720f3..110bee5 100644 --- a/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data @@ -1,29 +1,29 @@ #Description: Block-based dgemm. -Size: 31 Mflop/s: 2112.63 Percentage: 5.74 -Size: 32 Mflop/s: 2187.44 Percentage: 5.94 -Size: 96 Mflop/s: 2325.39 Percentage: 6.32 -Size: 97 Mflop/s: 2322.81 Percentage: 6.31 -Size: 127 Mflop/s: 2330.3 Percentage: 6.33 -Size: 128 Mflop/s: 2282.93 Percentage: 6.20 -Size: 129 Mflop/s: 2334.25 Percentage: 6.34 -Size: 191 Mflop/s: 2345.91 Percentage: 6.37 -Size: 192 Mflop/s: 2345.38 Percentage: 6.37 -Size: 229 Mflop/s: 2351.01 Percentage: 6.39 -Size: 255 Mflop/s: 2335.21 Percentage: 6.35 -Size: 256 Mflop/s: 2306.48 Percentage: 6.27 -Size: 257 Mflop/s: 2330.68 Percentage: 6.33 -Size: 319 Mflop/s: 2360.03 Percentage: 6.41 -Size: 320 Mflop/s: 2364.53 Percentage: 6.43 -Size: 321 Mflop/s: 2366.38 Percentage: 6.43 -Size: 417 Mflop/s: 2378.34 Percentage: 6.46 -Size: 479 Mflop/s: 2233.05 Percentage: 6.07 -Size: 480 Mflop/s: 2187.87 Percentage: 5.95 -Size: 511 Mflop/s: 2224.61 Percentage: 6.05 -Size: 512 Mflop/s: 2284.85 Percentage: 6.21 -Size: 639 Mflop/s: 2292.78 Percentage: 6.23 -Size: 640 Mflop/s: 2264.7 Percentage: 6.15 -Size: 767 Mflop/s: 2324.83 Percentage: 6.32 -Size: 768 Mflop/s: 2363.92 Percentage: 6.42 -Size: 769 Mflop/s: 2321.31 Percentage: 6.31 -#Average percentage of Peak = 6.25811 +Size: 31 Mflop/s: 3844.56 Percentage: 10.45 +Size: 32 Mflop/s: 5342.55 Percentage: 14.52 +Size: 96 Mflop/s: 5620.08 Percentage: 15.27 +Size: 97 Mflop/s: 4754.1 Percentage: 12.92 +Size: 127 Mflop/s: 4977.82 Percentage: 13.53 +Size: 128 Mflop/s: 4817.8 Percentage: 13.09 +Size: 129 Mflop/s: 4594.25 Percentage: 12.48 +Size: 191 Mflop/s: 4931.27 Percentage: 13.40 +Size: 192 Mflop/s: 5549.67 Percentage: 15.08 +Size: 229 Mflop/s: 4982.59 Percentage: 13.54 +Size: 255 Mflop/s: 4528.43 Percentage: 12.31 +Size: 256 Mflop/s: 4652.68 Percentage: 12.64 +Size: 257 Mflop/s: 4512.33 Percentage: 12.26 +Size: 319 Mflop/s: 5093.38 Percentage: 13.84 +Size: 320 Mflop/s: 5674.61 Percentage: 15.42 +Size: 321 Mflop/s: 5111.09 Percentage: 13.89 +Size: 417 Mflop/s: 5143.98 Percentage: 13.98 +Size: 479 Mflop/s: 5152.51 Percentage: 14.00 +Size: 480 Mflop/s: 5703 Percentage: 15.50 +Size: 511 Mflop/s: 4479.96 Percentage: 12.17 +Size: 512 Mflop/s: 4596.26 Percentage: 12.49 +Size: 639 Mflop/s: 5168.59 Percentage: 14.05 +Size: 640 Mflop/s: 5232.97 Percentage: 14.22 +Size: 767 Mflop/s: 4701.09 Percentage: 12.77 +Size: 768 Mflop/s: 4826.12 Percentage: 13.11 +Size: 769 Mflop/s: 4686.21 Percentage: 12.73 +#Average percentage of Peak = 13.4488