hw1: compiler flags
This commit is contained in:
parent
abbd0275ec
commit
980eb5f0b9
6 changed files with 259 additions and 263 deletions
|
@ -3,8 +3,8 @@
|
|||
#
|
||||
|
||||
CC = gcc
|
||||
OPT = -O2
|
||||
CFLAGS = -Wall -std=gnu99 $(OPT)
|
||||
OPT = -O3
|
||||
CFLAGS = -Wall -std=gnu99 -march=haswell -ffast-math $(OPT)
|
||||
LDFLAGS = -Wall
|
||||
# librt is needed for clock_gettime
|
||||
LDLIBS = -lrt -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm -ldl -m64 -I${MKLROOT}/include
|
||||
|
|
|
@ -15,29 +15,11 @@ LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKL
|
|||
|
||||
*/
|
||||
|
||||
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||
|
||||
const char* dgemm_desc = "Block-based dgemm.";
|
||||
|
||||
const int block_size = 26;
|
||||
|
||||
inline int min(int a, int b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A_row, double* B, double* C_temp) {
|
||||
for (int i = r_min, ii = 0; i < r_max; ++i, ++ii) {
|
||||
for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) {
|
||||
for (int k = k_min; k < k_max; k++) {
|
||||
C_temp[ii + jj * block_size] += A_row[i * n + k] * B[k + j * n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void store_c(double* C, double* C_temp, int r_min, int r_max, int c_min, int c_max, int n) {
|
||||
for (int j = c_min, jj = 0; j < c_max; ++j, ++jj) {
|
||||
memcpy(C + j * n + r_min, C_temp + jj * block_size, (r_max - r_min) * sizeof(double));
|
||||
}
|
||||
}
|
||||
int block_size = 32;
|
||||
|
||||
/* This routine performs a dgemm operation
|
||||
* C := C + A * B
|
||||
|
@ -57,19 +39,33 @@ void square_dgemm(int n, double* A, double* B, double* C) {
|
|||
}
|
||||
|
||||
for (int i = 0; i < n; i += block_size) {
|
||||
int i_next = min(i + block_size, n);
|
||||
int i_next = MIN(i + block_size, n);
|
||||
|
||||
for (int j = 0; j < n; j += block_size) {
|
||||
int j_next = min(j + block_size, n);
|
||||
int j_next = MIN(j + block_size, n);
|
||||
|
||||
// clear matrix C_temp
|
||||
memset(C_temp, 0, block_size * block_size * sizeof(double));
|
||||
|
||||
for (int k = 0; k < n; k += block_size) {
|
||||
int k_next = min(k + block_size, n);
|
||||
naivemm(i, i_next, k, k_next, j, j_next, n, A_row, B, C_temp);
|
||||
int k_next = MIN(k + block_size, n);
|
||||
|
||||
// begin naivemm
|
||||
for (int i2 = i, ii2 = 0; i2 < i_next; ++i2, ++ii2) {
|
||||
for (int j2 = j, jj2 = 0; j2 < j_next; ++j2, ++jj2) {
|
||||
for (int k2 = k; k2 < k_next; k2++) {
|
||||
C_temp[ii2 + jj2 * block_size] += A_row[i2 * n + k2] * B[k2 + j2 * n];
|
||||
}
|
||||
}
|
||||
}
|
||||
// end naivemm
|
||||
}
|
||||
|
||||
store_c(C, C_temp, i, i_next, j, j_next, n);
|
||||
// store C_temp in C
|
||||
for (int j2 = j, jj2 = 0; j2 < j_next; ++j2, ++jj2) {
|
||||
memcpy(C + j2 * n + i, C_temp + jj2 * block_size, (i_next - i) * sizeof(double));
|
||||
}
|
||||
// end store C_temp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
%!PS-Adobe-2.0
|
||||
%%Title: timing.ps
|
||||
%%Creator: gnuplot 5.2 patchlevel 8
|
||||
%%CreationDate: Mon Oct 3 21:44:11 2022
|
||||
%%CreationDate: Wed Oct 12 11:03:04 2022
|
||||
%%DocumentFonts: (atend)
|
||||
%%BoundingBox: 50 50 554 770
|
||||
%%Orientation: Landscape
|
||||
|
@ -483,7 +483,7 @@ SDict begin [
|
|||
/Creator (gnuplot 5.2 patchlevel 8)
|
||||
% /Producer (gnuplot)
|
||||
% /Keywords ()
|
||||
/CreationDate (Mon Oct 3 21:44:11 2022)
|
||||
/CreationDate (Wed Oct 12 11:03:04 2022)
|
||||
/DOCINFO pdfmark
|
||||
end
|
||||
} ifelse
|
||||
|
@ -960,58 +960,58 @@ LTb
|
|||
0.58 0.00 0.83 C
|
||||
6380 4486 M
|
||||
399 0 V
|
||||
1171 2365 M
|
||||
7 2 V
|
||||
481 -111 V
|
||||
8 0 V
|
||||
225 -23 V
|
||||
7 -63 V
|
||||
8 57 V
|
||||
466 -55 V
|
||||
7 -15 V
|
||||
278 7 V
|
||||
195 2 V
|
||||
8 -480 V
|
||||
7 483 V
|
||||
466 -9 V
|
||||
8 -112 V
|
||||
7 125 V
|
||||
721 -3 V
|
||||
466 0 V
|
||||
7 -19 V
|
||||
233 20 V
|
||||
8 -480 V
|
||||
954 473 V
|
||||
7 -573 V
|
||||
954 564 V
|
||||
8 -457 V
|
||||
7 460 V
|
||||
1171 2365 Pls
|
||||
1178 2367 Pls
|
||||
1659 2256 Pls
|
||||
1667 2256 Pls
|
||||
1892 2233 Pls
|
||||
1899 2170 Pls
|
||||
1907 2227 Pls
|
||||
2373 2172 Pls
|
||||
2380 2157 Pls
|
||||
2658 2164 Pls
|
||||
2853 2166 Pls
|
||||
2861 1686 Pls
|
||||
2868 2169 Pls
|
||||
3334 2160 Pls
|
||||
3342 2048 Pls
|
||||
3349 2173 Pls
|
||||
4070 2170 Pls
|
||||
4536 2170 Pls
|
||||
4543 2151 Pls
|
||||
4776 2171 Pls
|
||||
4784 1691 Pls
|
||||
5738 2164 Pls
|
||||
5745 1591 Pls
|
||||
6699 2155 Pls
|
||||
6707 1698 Pls
|
||||
6714 2158 Pls
|
||||
1171 2529 M
|
||||
7 42 V
|
||||
481 -132 V
|
||||
8 6 V
|
||||
225 -40 V
|
||||
7 -211 V
|
||||
8 237 V
|
||||
466 -92 V
|
||||
7 -145 V
|
||||
278 126 V
|
||||
195 -12 V
|
||||
8 -598 V
|
||||
7 615 V
|
||||
466 -18 V
|
||||
8 -180 V
|
||||
7 180 V
|
||||
721 -13 V
|
||||
466 -6 V
|
||||
7 -79 V
|
||||
233 81 V
|
||||
8 -586 V
|
||||
954 516 V
|
||||
7 -505 V
|
||||
954 487 V
|
||||
8 -489 V
|
||||
7 489 V
|
||||
1171 2529 Pls
|
||||
1178 2571 Pls
|
||||
1659 2439 Pls
|
||||
1667 2445 Pls
|
||||
1892 2405 Pls
|
||||
1899 2194 Pls
|
||||
1907 2431 Pls
|
||||
2373 2339 Pls
|
||||
2380 2194 Pls
|
||||
2658 2320 Pls
|
||||
2853 2308 Pls
|
||||
2861 1710 Pls
|
||||
2868 2325 Pls
|
||||
3334 2307 Pls
|
||||
3342 2127 Pls
|
||||
3349 2307 Pls
|
||||
4070 2294 Pls
|
||||
4536 2288 Pls
|
||||
4543 2209 Pls
|
||||
4776 2290 Pls
|
||||
4784 1704 Pls
|
||||
5738 2220 Pls
|
||||
5745 1715 Pls
|
||||
6699 2202 Pls
|
||||
6707 1713 Pls
|
||||
6714 2202 Pls
|
||||
6579 4486 Pls
|
||||
% End plot #1
|
||||
% Begin plot #2
|
||||
|
@ -1030,58 +1030,58 @@ LTb
|
|||
0.00 0.62 0.45 C
|
||||
6380 4346 M
|
||||
399 0 V
|
||||
1171 2290 M
|
||||
7 21 V
|
||||
481 37 V
|
||||
8 -1 V
|
||||
225 2 V
|
||||
7 -12 V
|
||||
8 13 V
|
||||
466 3 V
|
||||
7 0 V
|
||||
278 2 V
|
||||
195 -5 V
|
||||
8 -7 V
|
||||
7 6 V
|
||||
466 8 V
|
||||
8 1 V
|
||||
7 0 V
|
||||
721 3 V
|
||||
466 -38 V
|
||||
7 -12 V
|
||||
233 10 V
|
||||
1171 2651 M
|
||||
7 199 V
|
||||
481 31 V
|
||||
8 -101 V
|
||||
225 27 V
|
||||
7 -19 V
|
||||
8 -29 V
|
||||
466 43 V
|
||||
7 71 V
|
||||
278 -65 V
|
||||
195 -58 V
|
||||
8 17 V
|
||||
7 -19 V
|
||||
466 73 V
|
||||
8 66 V
|
||||
7 -64 V
|
||||
721 4 V
|
||||
466 1 V
|
||||
7 62 V
|
||||
233 -146 V
|
||||
8 15 V
|
||||
954 71 V
|
||||
7 8 V
|
||||
954 -65 V
|
||||
8 16 V
|
||||
954 2 V
|
||||
7 -7 V
|
||||
954 16 V
|
||||
8 10 V
|
||||
7 -11 V
|
||||
1171 2290 Crs
|
||||
1178 2311 Crs
|
||||
1659 2348 Crs
|
||||
1667 2347 Crs
|
||||
1892 2349 Crs
|
||||
1899 2337 Crs
|
||||
1907 2350 Crs
|
||||
2373 2353 Crs
|
||||
2380 2353 Crs
|
||||
2658 2355 Crs
|
||||
2853 2350 Crs
|
||||
2861 2343 Crs
|
||||
2868 2349 Crs
|
||||
3334 2357 Crs
|
||||
3342 2358 Crs
|
||||
3349 2358 Crs
|
||||
4070 2361 Crs
|
||||
4536 2323 Crs
|
||||
4543 2311 Crs
|
||||
4776 2321 Crs
|
||||
4784 2337 Crs
|
||||
5738 2339 Crs
|
||||
5745 2332 Crs
|
||||
6699 2348 Crs
|
||||
6707 2358 Crs
|
||||
6714 2347 Crs
|
||||
7 -18 V
|
||||
1171 2651 Crs
|
||||
1178 2850 Crs
|
||||
1659 2881 Crs
|
||||
1667 2780 Crs
|
||||
1892 2807 Crs
|
||||
1899 2788 Crs
|
||||
1907 2759 Crs
|
||||
2373 2802 Crs
|
||||
2380 2873 Crs
|
||||
2658 2808 Crs
|
||||
2853 2750 Crs
|
||||
2861 2767 Crs
|
||||
2868 2748 Crs
|
||||
3334 2821 Crs
|
||||
3342 2887 Crs
|
||||
3349 2823 Crs
|
||||
4070 2827 Crs
|
||||
4536 2828 Crs
|
||||
4543 2890 Crs
|
||||
4776 2744 Crs
|
||||
4784 2759 Crs
|
||||
5738 2830 Crs
|
||||
5745 2838 Crs
|
||||
6699 2773 Crs
|
||||
6707 2789 Crs
|
||||
6714 2771 Crs
|
||||
6579 4346 Crs
|
||||
% End plot #2
|
||||
% Begin plot #3
|
||||
|
@ -1100,58 +1100,58 @@ LTb
|
|||
0.34 0.71 0.91 C
|
||||
6380 4206 M
|
||||
399 0 V
|
||||
1171 3743 M
|
||||
7 112 V
|
||||
481 86 V
|
||||
8 -53 V
|
||||
225 -26 V
|
||||
7 81 V
|
||||
8 -27 V
|
||||
466 20 V
|
||||
7 10 V
|
||||
278 28 V
|
||||
195 -16 V
|
||||
8 0 V
|
||||
7 4 V
|
||||
466 12 V
|
||||
1171 3798 M
|
||||
7 73 V
|
||||
481 75 V
|
||||
8 -21 V
|
||||
225 -27 V
|
||||
7 46 V
|
||||
8 -18 V
|
||||
466 10 V
|
||||
7 58 V
|
||||
278 -17 V
|
||||
195 -13 V
|
||||
8 25 V
|
||||
7 -25 V
|
||||
466 11 V
|
||||
8 35 V
|
||||
7 -16 V
|
||||
721 11 V
|
||||
466 -55 V
|
||||
7 74 V
|
||||
233 -53 V
|
||||
8 41 V
|
||||
954 -22 V
|
||||
7 22 V
|
||||
954 -14 V
|
||||
8 -64 V
|
||||
7 17 V
|
||||
1171 3743 Star
|
||||
1178 3855 Star
|
||||
1659 3941 Star
|
||||
1667 3888 Star
|
||||
1892 3862 Star
|
||||
1899 3943 Star
|
||||
1907 3916 Star
|
||||
466 0 V
|
||||
7 29 V
|
||||
233 -46 V
|
||||
8 37 V
|
||||
954 -7 V
|
||||
7 21 V
|
||||
954 -17 V
|
||||
8 24 V
|
||||
7 -26 V
|
||||
1171 3798 Star
|
||||
1178 3871 Star
|
||||
1659 3946 Star
|
||||
1667 3925 Star
|
||||
1892 3898 Star
|
||||
1899 3944 Star
|
||||
1907 3926 Star
|
||||
2373 3936 Star
|
||||
2380 3946 Star
|
||||
2658 3974 Star
|
||||
2853 3958 Star
|
||||
2861 3958 Star
|
||||
2868 3962 Star
|
||||
3334 3974 Star
|
||||
3342 4009 Star
|
||||
3349 3993 Star
|
||||
4070 4004 Star
|
||||
4536 3949 Star
|
||||
4543 4023 Star
|
||||
4776 3970 Star
|
||||
4784 4011 Star
|
||||
5738 3989 Star
|
||||
5745 4011 Star
|
||||
6699 3997 Star
|
||||
6707 3933 Star
|
||||
6714 3950 Star
|
||||
2380 3994 Star
|
||||
2658 3977 Star
|
||||
2853 3964 Star
|
||||
2861 3989 Star
|
||||
2868 3964 Star
|
||||
3334 3975 Star
|
||||
3342 4010 Star
|
||||
3349 3994 Star
|
||||
4070 4005 Star
|
||||
4536 4005 Star
|
||||
4543 4034 Star
|
||||
4776 3988 Star
|
||||
4784 4025 Star
|
||||
5738 4018 Star
|
||||
5745 4039 Star
|
||||
6699 4022 Star
|
||||
6707 4046 Star
|
||||
6714 4020 Star
|
||||
6579 4206 Star
|
||||
% End plot #3
|
||||
2.000 UL
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
#Description: Naive, three-loop dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 2393.33 Percentage: 6.50
|
||||
Size: 32 Mflop/s: 2400.13 Percentage: 6.52
|
||||
Size: 96 Mflop/s: 1998.74 Percentage: 5.43
|
||||
Size: 97 Mflop/s: 1996.01 Percentage: 5.42
|
||||
Size: 127 Mflop/s: 1923.81 Percentage: 5.23
|
||||
Size: 128 Mflop/s: 1731.98 Percentage: 4.71
|
||||
Size: 129 Mflop/s: 1903.31 Percentage: 5.17
|
||||
Size: 191 Mflop/s: 1736.78 Percentage: 4.72
|
||||
Size: 192 Mflop/s: 1694.44 Percentage: 4.60
|
||||
Size: 229 Mflop/s: 1715.1 Percentage: 4.66
|
||||
Size: 255 Mflop/s: 1720.39 Percentage: 4.67
|
||||
Size: 256 Mflop/s: 777.65 Percentage: 2.11
|
||||
Size: 257 Mflop/s: 1729.27 Percentage: 4.70
|
||||
Size: 319 Mflop/s: 1704.8 Percentage: 4.63
|
||||
Size: 320 Mflop/s: 1414.84 Percentage: 3.84
|
||||
Size: 321 Mflop/s: 1741.3 Percentage: 4.73
|
||||
Size: 417 Mflop/s: 1733 Percentage: 4.71
|
||||
Size: 479 Mflop/s: 1731.17 Percentage: 4.70
|
||||
Size: 480 Mflop/s: 1678.77 Percentage: 4.56
|
||||
Size: 511 Mflop/s: 1733.6 Percentage: 4.71
|
||||
Size: 512 Mflop/s: 782.96 Percentage: 2.13
|
||||
Size: 639 Mflop/s: 1714.42 Percentage: 4.66
|
||||
Size: 640 Mflop/s: 663.418 Percentage: 1.80
|
||||
Size: 767 Mflop/s: 1690.82 Percentage: 4.59
|
||||
Size: 768 Mflop/s: 792.043 Percentage: 2.15
|
||||
Size: 769 Mflop/s: 1696.95 Percentage: 4.61
|
||||
#Average percentage of Peak = 4.47314
|
||||
Size: 31 Mflop/s: 3140.45 Percentage: 8.53
|
||||
Size: 32 Mflop/s: 3364.78 Percentage: 9.14
|
||||
Size: 96 Mflop/s: 2703.08 Percentage: 7.35
|
||||
Size: 97 Mflop/s: 2729.68 Percentage: 7.42
|
||||
Size: 127 Mflop/s: 2556.58 Percentage: 6.95
|
||||
Size: 128 Mflop/s: 1803.41 Percentage: 4.90
|
||||
Size: 129 Mflop/s: 2669.26 Percentage: 7.25
|
||||
Size: 191 Mflop/s: 2290.09 Percentage: 6.22
|
||||
Size: 192 Mflop/s: 1801.66 Percentage: 4.90
|
||||
Size: 229 Mflop/s: 2218.61 Percentage: 6.03
|
||||
Size: 255 Mflop/s: 2178.15 Percentage: 5.92
|
||||
Size: 256 Mflop/s: 808.413 Percentage: 2.20
|
||||
Size: 257 Mflop/s: 2238.93 Percentage: 6.08
|
||||
Size: 319 Mflop/s: 2174.45 Percentage: 5.91
|
||||
Size: 320 Mflop/s: 1612.13 Percentage: 4.38
|
||||
Size: 321 Mflop/s: 2173.64 Percentage: 5.91
|
||||
Size: 417 Mflop/s: 2125.36 Percentage: 5.78
|
||||
Size: 479 Mflop/s: 2107.13 Percentage: 5.73
|
||||
Size: 480 Mflop/s: 1848.43 Percentage: 5.02
|
||||
Size: 511 Mflop/s: 2112.99 Percentage: 5.74
|
||||
Size: 512 Mflop/s: 801.127 Percentage: 2.18
|
||||
Size: 639 Mflop/s: 1881.94 Percentage: 5.11
|
||||
Size: 640 Mflop/s: 815.847 Percentage: 2.22
|
||||
Size: 767 Mflop/s: 1825.75 Percentage: 4.96
|
||||
Size: 768 Mflop/s: 812.933 Percentage: 2.21
|
||||
Size: 769 Mflop/s: 1825.38 Percentage: 4.96
|
||||
#Average percentage of Peak = 5.4996
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
#Description: Reference dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 23449.2 Percentage: 63.72
|
||||
Size: 32 Mflop/s: 28198.9 Percentage: 76.63
|
||||
Size: 96 Mflop/s: 32542.3 Percentage: 88.43
|
||||
Size: 97 Mflop/s: 29801.3 Percentage: 80.98
|
||||
Size: 127 Mflop/s: 28557.8 Percentage: 77.60
|
||||
Size: 128 Mflop/s: 32643.3 Percentage: 88.70
|
||||
Size: 129 Mflop/s: 31198.2 Percentage: 84.78
|
||||
Size: 191 Mflop/s: 32247.3 Percentage: 87.63
|
||||
Size: 192 Mflop/s: 32830.6 Percentage: 89.21
|
||||
Size: 229 Mflop/s: 34360.9 Percentage: 93.37
|
||||
Size: 255 Mflop/s: 33477.7 Percentage: 90.97
|
||||
Size: 256 Mflop/s: 33473.9 Percentage: 90.96
|
||||
Size: 257 Mflop/s: 33686.5 Percentage: 91.54
|
||||
Size: 319 Mflop/s: 34335.2 Percentage: 93.30
|
||||
Size: 320 Mflop/s: 36438.1 Percentage: 99.02
|
||||
Size: 321 Mflop/s: 35433.7 Percentage: 96.29
|
||||
Size: 417 Mflop/s: 36133.7 Percentage: 98.19
|
||||
Size: 479 Mflop/s: 32951.4 Percentage: 89.54
|
||||
Size: 480 Mflop/s: 37260 Percentage:101.25
|
||||
Size: 511 Mflop/s: 34128 Percentage: 92.74
|
||||
Size: 512 Mflop/s: 36526.4 Percentage: 99.26
|
||||
Size: 639 Mflop/s: 35249.2 Percentage: 95.79
|
||||
Size: 640 Mflop/s: 36538.7 Percentage: 99.29
|
||||
Size: 767 Mflop/s: 35718.5 Percentage: 97.06
|
||||
Size: 768 Mflop/s: 32116.8 Percentage: 87.27
|
||||
Size: 769 Mflop/s: 33033.9 Percentage: 89.77
|
||||
#Average percentage of Peak = 90.1266
|
||||
Size: 31 Mflop/s: 25677.4 Percentage: 69.78
|
||||
Size: 32 Mflop/s: 28952.1 Percentage: 78.67
|
||||
Size: 96 Mflop/s: 32816.4 Percentage: 89.18
|
||||
Size: 97 Mflop/s: 31699.2 Percentage: 86.14
|
||||
Size: 127 Mflop/s: 30274.5 Percentage: 82.27
|
||||
Size: 128 Mflop/s: 32721.7 Percentage: 88.92
|
||||
Size: 129 Mflop/s: 31746.4 Percentage: 86.27
|
||||
Size: 191 Mflop/s: 32263.1 Percentage: 87.67
|
||||
Size: 192 Mflop/s: 35491.2 Percentage: 96.44
|
||||
Size: 229 Mflop/s: 34557.2 Percentage: 93.91
|
||||
Size: 255 Mflop/s: 33771.3 Percentage: 91.77
|
||||
Size: 256 Mflop/s: 35221.1 Percentage: 95.71
|
||||
Size: 257 Mflop/s: 33807.9 Percentage: 91.87
|
||||
Size: 319 Mflop/s: 34415.8 Percentage: 93.52
|
||||
Size: 320 Mflop/s: 36500.2 Percentage: 99.19
|
||||
Size: 321 Mflop/s: 35508.1 Percentage: 96.49
|
||||
Size: 417 Mflop/s: 36157.6 Percentage: 98.25
|
||||
Size: 479 Mflop/s: 36186.4 Percentage: 98.33
|
||||
Size: 480 Mflop/s: 37971.3 Percentage:103.18
|
||||
Size: 511 Mflop/s: 35144 Percentage: 95.50
|
||||
Size: 512 Mflop/s: 37362.5 Percentage:101.53
|
||||
Size: 639 Mflop/s: 36989.1 Percentage:100.51
|
||||
Size: 640 Mflop/s: 38267.8 Percentage:103.99
|
||||
Size: 767 Mflop/s: 37220.8 Percentage:101.14
|
||||
Size: 768 Mflop/s: 38744 Percentage:105.28
|
||||
Size: 769 Mflop/s: 37076.1 Percentage:100.75
|
||||
#Average percentage of Peak = 93.7023
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
#Description: Block-based dgemm.
|
||||
|
||||
Size: 31 Mflop/s: 2112.63 Percentage: 5.74
|
||||
Size: 32 Mflop/s: 2187.44 Percentage: 5.94
|
||||
Size: 96 Mflop/s: 2325.39 Percentage: 6.32
|
||||
Size: 97 Mflop/s: 2322.81 Percentage: 6.31
|
||||
Size: 127 Mflop/s: 2330.3 Percentage: 6.33
|
||||
Size: 128 Mflop/s: 2282.93 Percentage: 6.20
|
||||
Size: 129 Mflop/s: 2334.25 Percentage: 6.34
|
||||
Size: 191 Mflop/s: 2345.91 Percentage: 6.37
|
||||
Size: 192 Mflop/s: 2345.38 Percentage: 6.37
|
||||
Size: 229 Mflop/s: 2351.01 Percentage: 6.39
|
||||
Size: 255 Mflop/s: 2335.21 Percentage: 6.35
|
||||
Size: 256 Mflop/s: 2306.48 Percentage: 6.27
|
||||
Size: 257 Mflop/s: 2330.68 Percentage: 6.33
|
||||
Size: 319 Mflop/s: 2360.03 Percentage: 6.41
|
||||
Size: 320 Mflop/s: 2364.53 Percentage: 6.43
|
||||
Size: 321 Mflop/s: 2366.38 Percentage: 6.43
|
||||
Size: 417 Mflop/s: 2378.34 Percentage: 6.46
|
||||
Size: 479 Mflop/s: 2233.05 Percentage: 6.07
|
||||
Size: 480 Mflop/s: 2187.87 Percentage: 5.95
|
||||
Size: 511 Mflop/s: 2224.61 Percentage: 6.05
|
||||
Size: 512 Mflop/s: 2284.85 Percentage: 6.21
|
||||
Size: 639 Mflop/s: 2292.78 Percentage: 6.23
|
||||
Size: 640 Mflop/s: 2264.7 Percentage: 6.15
|
||||
Size: 767 Mflop/s: 2324.83 Percentage: 6.32
|
||||
Size: 768 Mflop/s: 2363.92 Percentage: 6.42
|
||||
Size: 769 Mflop/s: 2321.31 Percentage: 6.31
|
||||
#Average percentage of Peak = 6.25811
|
||||
Size: 31 Mflop/s: 3844.56 Percentage: 10.45
|
||||
Size: 32 Mflop/s: 5342.55 Percentage: 14.52
|
||||
Size: 96 Mflop/s: 5620.08 Percentage: 15.27
|
||||
Size: 97 Mflop/s: 4754.1 Percentage: 12.92
|
||||
Size: 127 Mflop/s: 4977.82 Percentage: 13.53
|
||||
Size: 128 Mflop/s: 4817.8 Percentage: 13.09
|
||||
Size: 129 Mflop/s: 4594.25 Percentage: 12.48
|
||||
Size: 191 Mflop/s: 4931.27 Percentage: 13.40
|
||||
Size: 192 Mflop/s: 5549.67 Percentage: 15.08
|
||||
Size: 229 Mflop/s: 4982.59 Percentage: 13.54
|
||||
Size: 255 Mflop/s: 4528.43 Percentage: 12.31
|
||||
Size: 256 Mflop/s: 4652.68 Percentage: 12.64
|
||||
Size: 257 Mflop/s: 4512.33 Percentage: 12.26
|
||||
Size: 319 Mflop/s: 5093.38 Percentage: 13.84
|
||||
Size: 320 Mflop/s: 5674.61 Percentage: 15.42
|
||||
Size: 321 Mflop/s: 5111.09 Percentage: 13.89
|
||||
Size: 417 Mflop/s: 5143.98 Percentage: 13.98
|
||||
Size: 479 Mflop/s: 5152.51 Percentage: 14.00
|
||||
Size: 480 Mflop/s: 5703 Percentage: 15.50
|
||||
Size: 511 Mflop/s: 4479.96 Percentage: 12.17
|
||||
Size: 512 Mflop/s: 4596.26 Percentage: 12.49
|
||||
Size: 639 Mflop/s: 5168.59 Percentage: 14.05
|
||||
Size: 640 Mflop/s: 5232.97 Percentage: 14.22
|
||||
Size: 767 Mflop/s: 4701.09 Percentage: 12.77
|
||||
Size: 768 Mflop/s: 4826.12 Percentage: 13.11
|
||||
Size: 769 Mflop/s: 4686.21 Percentage: 12.73
|
||||
#Average percentage of Peak = 13.4488
|
||||
|
|
Reference in a new issue