hw1: optimized blocked dgemm for matrix c
This commit is contained in:
parent
201e8c9927
commit
df1cb3cf01
7 changed files with 1978777 additions and 348 deletions
|
@ -88,7 +88,7 @@ and so on and so forth.
|
|||
Therefore, for \texttt{csize = 128} and \texttt{stride = 1} the array will
|
||||
access all indexes between 0 and 127 sequentially, and for \texttt{csize =
|
||||
$2^{20}$} and \texttt{stride = $2^{10}$} the benchmark will access index 0, then
|
||||
index $2^{10}-1$, and finally index $2^{20}-1$i.
|
||||
index $2^{10}-1$, and finally index $2^{20}-1$.
|
||||
|
||||
\subsection{Analyzing Benchmark Results}
|
||||
|
||||
|
|
1978702
Project1/project_1_maggioni_claudio/matmult/aaa.txt
Normal file
1978702
Project1/project_1_maggioni_claudio/matmult/aaa.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,5 @@
|
|||
#include <string.h>
|
||||
|
||||
/*
|
||||
Please include compiler name below (you may also include any other modules you would like to be loaded)
|
||||
|
||||
|
@ -16,43 +17,49 @@ LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKL
|
|||
|
||||
const char* dgemm_desc = "Block-based dgemm.";
|
||||
|
||||
const int block_size = 50;
|
||||
const int block_size = 18;
|
||||
|
||||
inline int min(int a, int b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A, double* B, double* C) {
|
||||
/* For each row i of A */
|
||||
for (int i = r_min; i < r_max; ++i) {
|
||||
|
||||
/* For each column j of B */
|
||||
for (int j = c_min; j < c_max; ++j) {
|
||||
|
||||
for(int k = k_min; k < k_max; k++) {
|
||||
C[i + j * n] += A[i + k * n] * B[k + j * n];
|
||||
inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A, double* B, double* C_temp) {
|
||||
for (int i = r_min, ii = 0; i < r_max; ++i, ++ii) {
|
||||
for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) {
|
||||
for (int k = k_min; k < k_max; k++) {
|
||||
C_temp[ii + jj * block_size] += A[i + k * n] * B[k + j * n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void store_c(double* C, double* C_temp, int r_min, int r_max, int c_min, int c_max, int n) {
|
||||
for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) {
|
||||
memcpy(C + j * n + r_min, C_temp + jj * block_size, (r_max - r_min) * sizeof(double));
|
||||
}
|
||||
}
|
||||
|
||||
/* This routine performs a dgemm operation
|
||||
* C := C + A * B
|
||||
* where A, B, and C are lda-by-lda matrices stored in column-major format.
|
||||
* On exit, A and B maintain their input values. */
|
||||
void square_dgemm(int n, double* A, double* B, double* C) {
|
||||
/* For each row i of A */
|
||||
double C_temp[block_size * block_size];
|
||||
|
||||
for (int i = 0; i < n; i += block_size) {
|
||||
int i_next = min(i + block_size, n);
|
||||
|
||||
/* For each column j of B */
|
||||
for (int j = 0; j < n; j += block_size) {
|
||||
int j_next = min(j + block_size, n);
|
||||
|
||||
memset(C_temp, 0, block_size * block_size * sizeof(double));
|
||||
|
||||
for (int k = 0; k < n; k += block_size) {
|
||||
int k_next = min(k + block_size, n);
|
||||
naivemm(i, i_next, k, k_next, j, j_next, n, A, B, C);
|
||||
naivemm(i, i_next, k, k_next, j, j_next, n, A, B, C_temp);
|
||||
}
|
||||
|
||||
store_c(C, C_temp, i, i_next, j, j_next, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
%!PS-Adobe-2.0
|
||||
%%Title: timing.ps
|
||||
%%Creator: gnuplot 5.2 patchlevel 8
|
||||
%%CreationDate: Wed Sep 28 13:13:45 2022
|
||||
%%CreationDate: Wed Sep 28 17:53:39 2022
|
||||
%%DocumentFonts: (atend)
|
||||
%%BoundingBox: 50 50 554 770
|
||||
%%Orientation: Landscape
|
||||
|
@ -483,7 +483,7 @@ SDict begin [
|
|||
/Creator (gnuplot 5.2 patchlevel 8)
|
||||
% /Producer (gnuplot)
|
||||
% /Keywords ()
|
||||
/CreationDate (Wed Sep 28 13:13:45 2022)
|
||||
/CreationDate (Wed Sep 28 17:53:39 2022)
|
||||
/DOCINFO pdfmark
|
||||
end
|
||||
} ifelse
|
||||
|
@ -545,34 +545,18 @@ LCb setrgbcolor
|
|||
-63 0 V
|
||||
stroke
|
||||
854 448 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 100)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 1000)]
|
||||
] -46.7 MRshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
/vshift -46 def
|
||||
1.000 UL
|
||||
LTb
|
||||
LCb setrgbcolor
|
||||
938 867 M
|
||||
938 1076 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1111 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1285 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1420 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1530 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1623 M
|
||||
938 1443 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
|
@ -580,7 +564,23 @@ LCb setrgbcolor
|
|||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 1775 M
|
||||
938 1906 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2071 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2210 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2331 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2438 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
|
@ -588,107 +588,53 @@ stroke
|
|||
0.500 UL
|
||||
LTa
|
||||
LCa setrgbcolor
|
||||
938 1838 M
|
||||
938 2534 M
|
||||
6009 0 V
|
||||
stroke
|
||||
1.000 UL
|
||||
LTb
|
||||
LCb setrgbcolor
|
||||
938 1838 M
|
||||
938 2534 M
|
||||
63 0 V
|
||||
5946 0 R
|
||||
-63 0 V
|
||||
stroke
|
||||
854 1838 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 1000)]
|
||||
] -46.7 MRshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
LTb
|
||||
LCb setrgbcolor
|
||||
938 2257 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2502 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2675 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2810 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 2920 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 3013 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 3094 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 3165 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
stroke
|
||||
0.500 UL
|
||||
LTa
|
||||
LCa setrgbcolor
|
||||
938 3229 M
|
||||
6009 0 V
|
||||
stroke
|
||||
1.000 UL
|
||||
LTb
|
||||
LCb setrgbcolor
|
||||
938 3229 M
|
||||
63 0 V
|
||||
5946 0 R
|
||||
-63 0 V
|
||||
stroke
|
||||
854 3229 M
|
||||
854 2534 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 10000)]
|
||||
] -46.7 MRshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
LTb
|
||||
LCb setrgbcolor
|
||||
938 3647 M
|
||||
938 3161 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 3892 M
|
||||
938 3529 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4066 M
|
||||
938 3789 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4200 M
|
||||
938 3991 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4311 M
|
||||
938 4156 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4404 M
|
||||
938 4296 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4484 M
|
||||
938 4417 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
938 4555 M
|
||||
938 4524 M
|
||||
31 0 V
|
||||
5978 0 R
|
||||
-31 0 V
|
||||
|
@ -729,7 +675,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
938 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 0)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 30.6)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -750,7 +696,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
1689 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 100)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 30.7)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -771,7 +717,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
2440 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 200)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 30.8)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -792,7 +738,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
3191 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 300)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 30.9)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -813,7 +759,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
3943 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 400)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 31)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -834,7 +780,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
4694 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 500)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 31.1)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -857,7 +803,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
5445 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 600)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 31.2)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -880,7 +826,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
6196 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 700)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 31.3)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -901,7 +847,7 @@ LCb setrgbcolor
|
|||
0 -63 V
|
||||
stroke
|
||||
6947 308 M
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 800)]
|
||||
[ [(Helvetica) 140.0 0.0 true true 0 ( 31.4)]
|
||||
] -46.7 MCshow
|
||||
/Helvetica findfont 140 scalefont setfont
|
||||
1.000 UL
|
||||
|
@ -960,58 +906,8 @@ LTb
|
|||
0.58 0.00 0.83 C
|
||||
6380 4486 M
|
||||
399 0 V
|
||||
1171 2295 M
|
||||
7 69 V
|
||||
481 -156 V
|
||||
8 46 V
|
||||
225 -35 V
|
||||
7 -150 V
|
||||
8 86 V
|
||||
466 20 V
|
||||
7 -109 V
|
||||
278 98 V
|
||||
195 2 V
|
||||
8 -631 V
|
||||
7 636 V
|
||||
466 -5 V
|
||||
8 -77 V
|
||||
7 77 V
|
||||
721 -1 V
|
||||
466 1 V
|
||||
7 -46 V
|
||||
233 45 V
|
||||
8 -654 V
|
||||
954 640 V
|
||||
7 -487 V
|
||||
954 308 V
|
||||
8 -337 V
|
||||
7 488 V
|
||||
1171 2295 Pls
|
||||
1178 2364 Pls
|
||||
1659 2208 Pls
|
||||
1667 2254 Pls
|
||||
1892 2219 Pls
|
||||
1899 2069 Pls
|
||||
1907 2155 Pls
|
||||
2373 2175 Pls
|
||||
2380 2066 Pls
|
||||
2658 2164 Pls
|
||||
2853 2166 Pls
|
||||
2861 1535 Pls
|
||||
2868 2171 Pls
|
||||
3334 2166 Pls
|
||||
3342 2089 Pls
|
||||
3349 2166 Pls
|
||||
4070 2165 Pls
|
||||
4536 2166 Pls
|
||||
4543 2120 Pls
|
||||
4776 2165 Pls
|
||||
4784 1511 Pls
|
||||
5738 2151 Pls
|
||||
5745 1664 Pls
|
||||
6699 1972 Pls
|
||||
6707 1635 Pls
|
||||
6714 2123 Pls
|
||||
3942 1253 M
|
||||
3942 1253 Pls
|
||||
6579 4486 Pls
|
||||
% End plot #1
|
||||
% Begin plot #2
|
||||
|
@ -1030,58 +926,8 @@ LTb
|
|||
0.00 0.62 0.45 C
|
||||
6380 4346 M
|
||||
399 0 V
|
||||
1171 2175 M
|
||||
7 -15 V
|
||||
481 8 V
|
||||
8 0 V
|
||||
225 -3 V
|
||||
7 -82 V
|
||||
8 64 V
|
||||
466 -5 V
|
||||
7 -54 V
|
||||
278 40 V
|
||||
195 -6 V
|
||||
8 -446 V
|
||||
7 445 V
|
||||
466 8 V
|
||||
8 -121 V
|
||||
7 108 V
|
||||
721 14 V
|
||||
466 -4 V
|
||||
7 -36 V
|
||||
233 16 V
|
||||
8 -553 V
|
||||
954 567 V
|
||||
7 -549 V
|
||||
954 539 V
|
||||
8 -424 V
|
||||
7 409 V
|
||||
1171 2175 Crs
|
||||
1178 2160 Crs
|
||||
1659 2168 Crs
|
||||
1667 2168 Crs
|
||||
1892 2165 Crs
|
||||
1899 2083 Crs
|
||||
1907 2147 Crs
|
||||
2373 2142 Crs
|
||||
2380 2088 Crs
|
||||
2658 2128 Crs
|
||||
2853 2122 Crs
|
||||
2861 1676 Crs
|
||||
2868 2121 Crs
|
||||
3334 2129 Crs
|
||||
3342 2008 Crs
|
||||
3349 2116 Crs
|
||||
4070 2130 Crs
|
||||
4536 2126 Crs
|
||||
4543 2090 Crs
|
||||
4776 2106 Crs
|
||||
4784 1553 Crs
|
||||
5738 2120 Crs
|
||||
5745 1571 Crs
|
||||
6699 2110 Crs
|
||||
6707 1686 Crs
|
||||
6714 2095 Crs
|
||||
3942 1205 M
|
||||
3942 1205 Crs
|
||||
6579 4346 Crs
|
||||
% End plot #2
|
||||
% Begin plot #3
|
||||
|
@ -1100,58 +946,8 @@ LTb
|
|||
0.34 0.71 0.91 C
|
||||
6380 4206 M
|
||||
399 0 V
|
||||
1171 3733 M
|
||||
7 79 V
|
||||
481 134 V
|
||||
8 -28 V
|
||||
225 -40 V
|
||||
7 64 V
|
||||
8 -28 V
|
||||
466 9 V
|
||||
7 66 V
|
||||
278 -17 V
|
||||
195 -28 V
|
||||
8 33 V
|
||||
7 -30 V
|
||||
466 2 V
|
||||
8 59 V
|
||||
7 -32 V
|
||||
721 -6 V
|
||||
466 17 V
|
||||
7 9 V
|
||||
233 -33 V
|
||||
8 38 V
|
||||
954 -23 V
|
||||
7 12 V
|
||||
954 8 V
|
||||
8 23 V
|
||||
7 -52 V
|
||||
1171 3733 Star
|
||||
1178 3812 Star
|
||||
1659 3946 Star
|
||||
1667 3918 Star
|
||||
1892 3878 Star
|
||||
1899 3942 Star
|
||||
1907 3914 Star
|
||||
2373 3923 Star
|
||||
2380 3989 Star
|
||||
2658 3972 Star
|
||||
2853 3944 Star
|
||||
2861 3977 Star
|
||||
2868 3947 Star
|
||||
3334 3949 Star
|
||||
3342 4008 Star
|
||||
3349 3976 Star
|
||||
4070 3970 Star
|
||||
4536 3987 Star
|
||||
4543 3996 Star
|
||||
4776 3963 Star
|
||||
4784 4001 Star
|
||||
5738 3978 Star
|
||||
5745 3990 Star
|
||||
6699 3998 Star
|
||||
6707 4021 Star
|
||||
6714 3969 Star
|
||||
3942 3120 M
|
||||
3942 3120 Star
|
||||
6579 4206 Star
|
||||
% End plot #3
|
||||
2.000 UL
|
||||
|
|
|
@ -1,29 +1,4 @@
|
|||
#Description: Naive, three-loop dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 2131.35 Percentage: 5.79
|
||||
Size: 32 Mflop/s: 2387.28 Percentage: 6.49
|
||||
Size: 96 Mflop/s: 1844.52 Percentage: 5.01
|
||||
Size: 97 Mflop/s: 1991.74 Percentage: 5.41
|
||||
Size: 127 Mflop/s: 1878.09 Percentage: 5.10
|
||||
Size: 128 Mflop/s: 1466.11 Percentage: 3.98
|
||||
Size: 129 Mflop/s: 1688.41 Percentage: 4.59
|
||||
Size: 191 Mflop/s: 1747.28 Percentage: 4.75
|
||||
Size: 192 Mflop/s: 1458.67 Percentage: 3.96
|
||||
Size: 229 Mflop/s: 1714.53 Percentage: 4.66
|
||||
Size: 255 Mflop/s: 1719.28 Percentage: 4.67
|
||||
Size: 256 Mflop/s: 604.682 Percentage: 1.64
|
||||
Size: 257 Mflop/s: 1733.51 Percentage: 4.71
|
||||
Size: 319 Mflop/s: 1720.89 Percentage: 4.68
|
||||
Size: 320 Mflop/s: 1514.24 Percentage: 4.11
|
||||
Size: 321 Mflop/s: 1721.2 Percentage: 4.68
|
||||
Size: 417 Mflop/s: 1718.17 Percentage: 4.67
|
||||
Size: 479 Mflop/s: 1719.18 Percentage: 4.67
|
||||
Size: 480 Mflop/s: 1594.88 Percentage: 4.33
|
||||
Size: 511 Mflop/s: 1716.8 Percentage: 4.67
|
||||
Size: 512 Mflop/s: 581.233 Percentage: 1.58
|
||||
Size: 639 Mflop/s: 1678.33 Percentage: 4.56
|
||||
Size: 640 Mflop/s: 749.008 Percentage: 2.04
|
||||
Size: 767 Mflop/s: 1247.59 Percentage: 3.39
|
||||
Size: 768 Mflop/s: 714.52 Percentage: 1.94
|
||||
Size: 769 Mflop/s: 1603.09 Percentage: 4.36
|
||||
#Average percentage of Peak = 4.24797
|
||||
Size: 31 Mflop/s: 2431.2 Percentage: 6.61
|
||||
#Average percentage of Peak = 6.60652
|
||||
|
|
|
@ -1,29 +1,4 @@
|
|||
#Description: Reference dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 23035.3 Percentage: 62.60
|
||||
Size: 32 Mflop/s: 26290.9 Percentage: 71.44
|
||||
Size: 96 Mflop/s: 32829.1 Percentage: 89.21
|
||||
Size: 97 Mflop/s: 31312.6 Percentage: 85.09
|
||||
Size: 127 Mflop/s: 29329 Percentage: 79.70
|
||||
Size: 128 Mflop/s: 32578.6 Percentage: 88.53
|
||||
Size: 129 Mflop/s: 31113.1 Percentage: 84.55
|
||||
Size: 191 Mflop/s: 31590.5 Percentage: 85.84
|
||||
Size: 192 Mflop/s: 35219.4 Percentage: 95.70
|
||||
Size: 229 Mflop/s: 34236 Percentage: 93.03
|
||||
Size: 255 Mflop/s: 32692.5 Percentage: 88.84
|
||||
Size: 256 Mflop/s: 34510 Percentage: 93.78
|
||||
Size: 257 Mflop/s: 32844.7 Percentage: 89.25
|
||||
Size: 319 Mflop/s: 32950.9 Percentage: 89.54
|
||||
Size: 320 Mflop/s: 36332.4 Percentage: 98.73
|
||||
Size: 321 Mflop/s: 34460.3 Percentage: 93.64
|
||||
Size: 417 Mflop/s: 34136 Percentage: 92.76
|
||||
Size: 479 Mflop/s: 35101.8 Percentage: 95.39
|
||||
Size: 480 Mflop/s: 35608.8 Percentage: 96.76
|
||||
Size: 511 Mflop/s: 33768.6 Percentage: 91.76
|
||||
Size: 512 Mflop/s: 35947 Percentage: 97.68
|
||||
Size: 639 Mflop/s: 34572.5 Percentage: 93.95
|
||||
Size: 640 Mflop/s: 35268.1 Percentage: 95.84
|
||||
Size: 767 Mflop/s: 35731.4 Percentage: 97.10
|
||||
Size: 768 Mflop/s: 37114.6 Percentage:100.85
|
||||
Size: 769 Mflop/s: 34093.6 Percentage: 92.65
|
||||
#Average percentage of Peak = 90.1618
|
||||
Size: 31 Mflop/s: 19099.4 Percentage: 51.90
|
||||
#Average percentage of Peak = 51.9005
|
||||
|
|
|
@ -1,29 +1,3 @@
|
|||
#Description: Naive, three-loop dgemm.
|
||||
#Description: Block-based dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 1065.56 Percentage: 2.90
|
||||
Size: 32 Mflop/s: 1703.76 Percentage: 4.63
|
||||
Size: 96 Mflop/s: 1730.73 Percentage: 4.70
|
||||
Size: 97 Mflop/s: 1728.48 Percentage: 4.70
|
||||
Size: 127 Mflop/s: 1718.52 Percentage: 4.67
|
||||
Size: 128 Mflop/s: 1533.64 Percentage: 4.17
|
||||
Size: 129 Mflop/s: 1724.17 Percentage: 4.69
|
||||
Size: 191 Mflop/s: 1636.9 Percentage: 4.45
|
||||
Size: 192 Mflop/s: 1534.75 Percentage: 4.17
|
||||
Size: 229 Mflop/s: 1604.48 Percentage: 4.36
|
||||
Size: 255 Mflop/s: 1462.11 Percentage: 3.97
|
||||
Size: 256 Mflop/s: 730.562 Percentage: 1.99
|
||||
Size: 257 Mflop/s: 1483.12 Percentage: 4.03
|
||||
Size: 319 Mflop/s: 1409.3 Percentage: 3.83
|
||||
Size: 320 Mflop/s: 1303.95 Percentage: 3.54
|
||||
Size: 321 Mflop/s: 1621.34 Percentage: 4.41
|
||||
Size: 417 Mflop/s: 1496.69 Percentage: 4.07
|
||||
Size: 479 Mflop/s: 1518.7 Percentage: 4.13
|
||||
Size: 480 Mflop/s: 1429.18 Percentage: 3.88
|
||||
Size: 511 Mflop/s: 1371.7 Percentage: 3.73
|
||||
Size: 512 Mflop/s: 602.424 Percentage: 1.64
|
||||
Size: 639 Mflop/s: 1339.03 Percentage: 3.64
|
||||
Size: 640 Mflop/s: 913.949 Percentage: 2.48
|
||||
Size: 767 Mflop/s: 1566.19 Percentage: 4.26
|
||||
Size: 768 Mflop/s: 757.52 Percentage: 2.06
|
||||
Size: 769 Mflop/s: 1559.49 Percentage: 4.24
|
||||
#Average percentage of Peak = 3.81963
|
||||
Size: 31 Mflop/s: 2306.44 Percentage: 6.27
|
||||
|
|
Reference in a new issue