From 3b9b6babbb86a791ef4970590c9878908ab2e2f3 Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Wed, 28 Sep 2022 13:20:37 +0200 Subject: [PATCH] hw1: blocked dgemm is correct --- .../matmult/benchmark.c | 2 +- .../matmult/dgemm-blocked.c | 51 +- .../matmult/timing.ps | 1179 +++++++++++++++++ .../matmult/timing_basic_dgemm.data | 29 + .../matmult/timing_blas_dgemm.data | 29 + .../matmult/timing_blocked_dgemm.data | 29 + 6 files changed, 1303 insertions(+), 16 deletions(-) create mode 100644 Project1/project_1_maggioni_claudio/matmult/timing.ps create mode 100644 Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data create mode 100644 Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data create mode 100644 Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data diff --git a/Project1/project_1_maggioni_claudio/matmult/benchmark.c b/Project1/project_1_maggioni_claudio/matmult/benchmark.c index 7ee3b86..8cb59ad 100644 --- a/Project1/project_1_maggioni_claudio/matmult/benchmark.c +++ b/Project1/project_1_maggioni_claudio/matmult/benchmark.c @@ -79,7 +79,7 @@ int main (int argc, char **argv) /* {31,32,33,63,64,65,95,96,97,127,128,129,159,160,161,191,192,193,223,224,225,255,256,257,287,288,289,319,320,321,351,352,353,383,384,385,415,416,417,447,448,449,479,480,481,511,512,513,543,544,545,575,576,577,607,608,609,639,640,641,671,672,673,703,704,705,735,736,737,767,768,769,799,800,801,831,832,833,863,864,865,895,896,897,927,928,929,959,960,961,991,992,993,1023,1024,1025}; */ /* A representative subset of the first list. Currently uncommented. */ - { 31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257, + { 31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257, 319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769 }; int nsizes = sizeof(test_sizes)/sizeof(test_sizes[0]); diff --git a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c index 17adcbf..05e941a 100644 --- a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c +++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c @@ -1,3 +1,4 @@ +#include /* Please include compiler name below (you may also include any other modules you would like to be loaded) @@ -13,25 +14,45 @@ LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKL */ -const char* dgemm_desc = "Naive, three-loop dgemm."; +const char* dgemm_desc = "Block-based dgemm."; + +const int block_size = 50; + +inline int min(int a, int b) { + return a < b ? a : b; +} + +inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A, double* B, double* C) { + /* For each row i of A */ + for (int i = r_min; i < r_max; ++i) { + + /* For each column j of B */ + for (int j = c_min; j < c_max; ++j) { + + for(int k = k_min; k < k_max; k++) { + C[i + j * n] += A[i + k * n] * B[k + j * n]; + } + } + } +} /* This routine performs a dgemm operation * C := C + A * B * where A, B, and C are lda-by-lda matrices stored in column-major format. * On exit, A and B maintain their input values. */ -void square_dgemm (int n, double* A, double* B, double* C) -{ - // TODO: Implement the blocking optimization - - /* For each row i of A */ - for (int i = 0; i < n; ++i) - /* For each column j of B */ - for (int j = 0; j < n; ++j) - { - /* Compute C(i,j) */ - double cij = C[i+j*n]; - for( int k = 0; k < n; k++ ) - cij += A[i+k*n] * B[k+j*n]; - C[i+j*n] = cij; +void square_dgemm(int n, double* A, double* B, double* C) { + /* For each row i of A */ + for (int i = 0; i < n; i += block_size) { + int i_next = min(i + block_size, n); + + /* For each column j of B */ + for (int j = 0; j < n; j += block_size) { + int j_next = min(j + block_size, n); + + for (int k = 0; k < n; k += block_size) { + int k_next = min(k + block_size, n); + naivemm(i, i_next, k, k_next, j, j_next, n, A, B, C); + } + } } } diff --git a/Project1/project_1_maggioni_claudio/matmult/timing.ps b/Project1/project_1_maggioni_claudio/matmult/timing.ps new file mode 100644 index 0000000..5b3a60f --- /dev/null +++ b/Project1/project_1_maggioni_claudio/matmult/timing.ps @@ -0,0 +1,1179 @@ +%!PS-Adobe-2.0 +%%Title: timing.ps +%%Creator: gnuplot 5.2 patchlevel 8 +%%CreationDate: Wed Sep 28 13:13:45 2022 +%%DocumentFonts: (atend) +%%BoundingBox: 50 50 554 770 +%%Orientation: Landscape +%%Pages: (atend) +%%EndComments +%%BeginProlog +/gnudict 256 dict def +gnudict begin +% +% The following true/false flags may be edited by hand if desired. +% The unit line width and grayscale image gamma correction may also be changed. +% +/Color true def +/Blacktext false def +/Solid false def +/Dashlength 1 def +/Landscape true def +/Level1 false def +/Level3 false def +/Rounded false def +/ClipToBoundingBox false def +/SuppressPDFMark false def +/TransparentPatterns false def +/gnulinewidth 5.000 def +/userlinewidth gnulinewidth def +/Gamma 1.0 def +/BackgroundColor {-1.000 -1.000 -1.000} def +% +/vshift -46 def +/dl1 { + 10.0 Dashlength userlinewidth gnulinewidth div mul mul mul + Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if +} def +/dl2 { + 10.0 Dashlength userlinewidth gnulinewidth div mul mul mul + Rounded { currentlinewidth 0.75 mul add } if +} def +/hpt_ 31.5 def +/vpt_ 31.5 def +/hpt hpt_ def +/vpt vpt_ def +/doclip { + ClipToBoundingBox { + newpath 50 50 moveto 554 50 lineto 554 770 lineto 50 770 lineto closepath + clip + } if +} def +% +% Gnuplot Prolog Version 5.2 (Dec 2017) +% +%/SuppressPDFMark true def +% +/M {moveto} bind def +/L {lineto} bind def +/R {rmoveto} bind def +/V {rlineto} bind def +/N {newpath moveto} bind def +/Z {closepath} bind def +/C {setrgbcolor} bind def +/f {rlineto fill} bind def +/g {setgray} bind def +/Gshow {show} def % May be redefined later in the file to support UTF-8 +/vpt2 vpt 2 mul def +/hpt2 hpt 2 mul def +/Lshow {currentpoint stroke M 0 vshift R + Blacktext {gsave 0 setgray textshow grestore} {textshow} ifelse} def +/Rshow {currentpoint stroke M dup stringwidth pop neg vshift R + Blacktext {gsave 0 setgray textshow grestore} {textshow} ifelse} def +/Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R + Blacktext {gsave 0 setgray textshow grestore} {textshow} ifelse} def +/UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def + /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def +/DL {Color {setrgbcolor Solid {pop []} if 0 setdash} + {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def +/BL {stroke userlinewidth 2 mul setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +/AL {stroke userlinewidth 2 div setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +/UL {dup gnulinewidth mul /userlinewidth exch def + dup 1 lt {pop 1} if 10 mul /udl exch def} def +/PL {stroke userlinewidth setlinewidth + Rounded {1 setlinejoin 1 setlinecap} if} def +3.8 setmiterlimit +% Classic Line colors (version 5.0) +/LCw {1 1 1} def +/LCb {0 0 0} def +/LCa {0 0 0} def +/LC0 {1 0 0} def +/LC1 {0 1 0} def +/LC2 {0 0 1} def +/LC3 {1 0 1} def +/LC4 {0 1 1} def +/LC5 {1 1 0} def +/LC6 {0 0 0} def +/LC7 {1 0.3 0} def +/LC8 {0.5 0.5 0.5} def +% Default dash patterns (version 5.0) +/LTB {BL [] LCb DL} def +/LTw {PL [] 1 setgray} def +/LTb {PL [] LCb DL} def +/LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def +/LT0 {PL [] LC0 DL} def +/LT1 {PL [2 dl1 3 dl2] LC1 DL} def +/LT2 {PL [1 dl1 1.5 dl2] LC2 DL} def +/LT3 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC3 DL} def +/LT4 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def +/LT5 {PL [4 dl1 2 dl2] LC5 DL} def +/LT6 {PL [1.5 dl1 1.5 dl2 1.5 dl1 1.5 dl2 1.5 dl1 6 dl2] LC6 DL} def +/LT7 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC7 DL} def +/LT8 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC8 DL} def +/SL {[] 0 setdash} def +/Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def +/Dia {stroke [] 0 setdash 2 copy vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath stroke + Pnt} def +/Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V + currentpoint stroke M + hpt neg vpt neg R hpt2 0 V stroke + } def +/Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath stroke + Pnt} def +/Crs {stroke [] 0 setdash exch hpt sub exch vpt add M + hpt2 vpt2 neg V currentpoint stroke M + hpt2 neg 0 R hpt2 vpt2 V stroke} def +/TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath stroke + Pnt} def +/Star {2 copy Pls Crs} def +/BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath fill} def +/TriUF {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath fill} def +/TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath stroke + Pnt} def +/TriDF {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath fill} def +/DiaF {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath fill} def +/Pent {stroke [] 0 setdash 2 copy gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath stroke grestore Pnt} def +/PentF {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath fill grestore} def +/Circle {stroke [] 0 setdash 2 copy + hpt 0 360 arc stroke Pnt} def +/CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def +/C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def +/C1 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc closepath fill + vpt 0 360 arc closepath} bind def +/C2 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C3 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C4 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 180 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C5 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc + 2 copy moveto + 2 copy vpt 180 270 arc closepath fill + vpt 0 360 arc} bind def +/C6 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C7 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 270 arc closepath fill + vpt 0 360 arc closepath} bind def +/C8 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 270 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C9 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 270 450 arc closepath fill + vpt 0 360 arc closepath} bind def +/C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill + 2 copy moveto + 2 copy vpt 90 180 arc closepath fill + vpt 0 360 arc closepath} bind def +/C11 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 180 arc closepath fill + 2 copy moveto + 2 copy vpt 270 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C12 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 180 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C13 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 0 90 arc closepath fill + 2 copy moveto + 2 copy vpt 180 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/C14 {BL [] 0 setdash 2 copy moveto + 2 copy vpt 90 360 arc closepath fill + vpt 0 360 arc} bind def +/C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill + vpt 0 360 arc closepath} bind def +/Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto + neg 0 rlineto closepath} bind def +/Square {dup Rec} bind def +/Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def +/S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def +/S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def +/S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def +/S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def +/S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def +/S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill + exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def +/S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def +/S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill + 2 copy vpt Square fill Bsquare} bind def +/S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def +/S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def +/S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill + Bsquare} bind def +/S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill + Bsquare} bind def +/S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def +/S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill + 2 copy vpt Square fill Bsquare} bind def +/S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill + 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def +/S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def +/D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def +/D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def +/D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def +/D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def +/D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def +/D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def +/D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def +/D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def +/D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def +/D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def +/D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def +/D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def +/D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def +/D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def +/D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def +/D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def +/DiaE {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V closepath stroke} def +/BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V closepath stroke} def +/TriUE {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V closepath stroke} def +/TriDE {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V closepath stroke} def +/PentE {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + closepath stroke grestore} def +/CircE {stroke [] 0 setdash + hpt 0 360 arc stroke} def +/Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def +/DiaW {stroke [] 0 setdash vpt add M + hpt neg vpt neg V hpt vpt neg V + hpt vpt V hpt neg vpt V Opaque stroke} def +/BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M + 0 vpt2 neg V hpt2 0 V 0 vpt2 V + hpt2 neg 0 V Opaque stroke} def +/TriUW {stroke [] 0 setdash vpt 1.12 mul add M + hpt neg vpt -1.62 mul V + hpt 2 mul 0 V + hpt neg vpt 1.62 mul V Opaque stroke} def +/TriDW {stroke [] 0 setdash vpt 1.12 mul sub M + hpt neg vpt 1.62 mul V + hpt 2 mul 0 V + hpt neg vpt -1.62 mul V Opaque stroke} def +/PentW {stroke [] 0 setdash gsave + translate 0 hpt M 4 {72 rotate 0 hpt L} repeat + Opaque stroke grestore} def +/CircW {stroke [] 0 setdash + hpt 0 360 arc Opaque stroke} def +/BoxFill {gsave Rec 1 setgray fill grestore} def +/Density { + /Fillden exch def + currentrgbcolor + /ColB exch def /ColG exch def /ColR exch def + /ColR ColR Fillden mul Fillden sub 1 add def + /ColG ColG Fillden mul Fillden sub 1 add def + /ColB ColB Fillden mul Fillden sub 1 add def + ColR ColG ColB setrgbcolor} def +/BoxColFill {gsave Rec PolyFill} def +/PolyFill {gsave Density fill grestore grestore} def +/h {rlineto rlineto rlineto closepath gsave fill grestore stroke} bind def +% +% PostScript Level 1 Pattern Fill routine for rectangles +% Usage: x y w h s a XX PatternFill +% x,y = lower left corner of box to be filled +% w,h = width and height of box +% a = angle in degrees between lines and x-axis +% XX = 0/1 for no/yes cross-hatch +% +/PatternFill {gsave /PFa [ 9 2 roll ] def + PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate + PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec + TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse + clip + currentlinewidth 0.5 mul setlinewidth + /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def + 0 0 M PFa 5 get rotate PFs -2 div dup translate + 0 1 PFs PFa 4 get div 1 add floor cvi + {PFa 4 get mul 0 M 0 PFs V} for + 0 PFa 6 get ne { + 0 1 PFs PFa 4 get div 1 add floor cvi + {PFa 4 get mul 0 2 1 roll M PFs 0 V} for + } if + stroke grestore} def +% +/languagelevel where + {pop languagelevel} {1} ifelse +dup 2 lt + {/InterpretLevel1 true def + /InterpretLevel3 false def} + {/InterpretLevel1 Level1 def + 2 gt + {/InterpretLevel3 Level3 def} + {/InterpretLevel3 false def} + ifelse } + ifelse +% +% PostScript level 2 pattern fill definitions +% +/Level2PatternFill { +/Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8} + bind def +/KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke} +>> matrix makepattern +/Pat1 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke + 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke} +>> matrix makepattern +/Pat2 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L + 8 8 L 8 0 L 0 0 L fill} +>> matrix makepattern +/Pat3 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L + 0 12 M 12 0 L stroke} +>> matrix makepattern +/Pat4 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L + 0 -4 M 12 8 L stroke} +>> matrix makepattern +/Pat5 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L + 0 12 M 8 -4 L 4 12 M 10 0 L stroke} +>> matrix makepattern +/Pat6 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L + 0 -4 M 8 12 L 4 -4 M 10 8 L stroke} +>> matrix makepattern +/Pat7 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L + 12 0 M -4 8 L 12 4 M 0 10 L stroke} +>> matrix makepattern +/Pat8 exch def +<< Tile8x8 + /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L + -4 0 M 12 8 L -4 4 M 8 10 L stroke} +>> matrix makepattern +/Pat9 exch def +/Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def +/Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def +/Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def +/Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def +/Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def +/Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def +/Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def +} def +% +% +%End of PostScript Level 2 code +% +/PatternBgnd { + TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse +} def +% +% Substitute for Level 2 pattern fill codes with +% grayscale if Level 2 support is not selected. +% +/Level1PatternFill { +/Pattern1 {0.250 Density} bind def +/Pattern2 {0.500 Density} bind def +/Pattern3 {0.750 Density} bind def +/Pattern4 {0.125 Density} bind def +/Pattern5 {0.375 Density} bind def +/Pattern6 {0.625 Density} bind def +/Pattern7 {0.875 Density} bind def +} def +% +% Now test for support of Level 2 code +% +Level1 {Level1PatternFill} {Level2PatternFill} ifelse +% +/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont +dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall +currentdict end definefont pop +% +/Metrics {ExtendTextBox Gswidth} def +/Lwidth {currentpoint stroke M 0 vshift R Metrics} def +/Rwidth {currentpoint stroke M dup stringwidth pop neg vshift R Metrics} def +/Cwidth {currentpoint stroke M dup stringwidth pop -2 div vshift R Metrics} def +/GLwidth {currentpoint stroke M 0 vshift R {ExtendTextBox} forall} def +/GRwidth {currentpoint stroke M dup Gwidth vshift R {ExtendTextBox} forall} def +/GCwidth {currentpoint stroke M dup Gwidth 2 div vshift R {ExtendTextBox} forall} def +/GLwidth2 {0 Gwidth AddGlyphWidth} def +/GRwidth2 {Gwidth -1 mul 0 AddGlyphWidth} def +/GCwidth2 {Gwidth 2 div dup -1 mul AddGlyphWidth} def +/AddGlyphWidth { dup TBx2 gt {userdict /TBx2 3 -1 roll put} {pop} ifelse + dup TBx1 lt {userdict /TBx1 3 -1 roll put} {pop} ifelse } def +/MFshow { + { dup 5 get 3 ge + { 5 get 3 eq {gsave} {grestore} ifelse } + {dup dup 0 get findfont exch 1 get scalefont setfont + [ currentpoint ] exch dup 2 get 0 exch R dup 5 get 2 ne {dup dup 6 + get exch 4 get {textshow} {Metrics pop 0 R} ifelse }if dup 5 get 0 eq + {dup 3 get {2 get neg 0 exch R pop} {pop aload pop M} ifelse} {dup 5 + get 1 eq {dup 2 get exch dup 3 get exch 6 get Gswidth pop -2 div + dup 0 R} {dup 6 get Gswidth pop -2 div 0 R 6 get + textshow 2 index {aload pop M neg 3 -1 roll neg R pop pop} {pop pop pop + pop aload pop M} ifelse }ifelse }ifelse } + ifelse } + forall} def +/Gswidth {dup type /stringtype eq {stringwidth} {pop (n) stringwidth} ifelse} def +/MFwidth {0 exch { dup 5 get 3 ge { 5 get 3 eq { 0 } { pop } ifelse } + {dup 3 get{dup dup 0 get findfont exch 1 get scalefont setfont + 6 get Gswidth pop add} {pop} ifelse} ifelse} forall} def +/MLshow { currentpoint stroke M + 0 exch R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/MRshow { currentpoint stroke M + exch dup MFwidth neg 3 -1 roll R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/MCshow { currentpoint stroke M + exch dup MFwidth -2 div 3 -1 roll R + Blacktext {gsave 0 setgray MFshow grestore} {MFshow} ifelse } bind def +/XYsave { [( ) 1 2 true false 3 ()] } bind def +/XYrestore { [( ) 1 2 true false 4 ()] } bind def +Level1 SuppressPDFMark or +{} { +/SDict 10 dict def +systemdict /pdfmark known not { + userdict /pdfmark systemdict /cleartomark get put +} if +SDict begin [ + /Title (timing.ps) + /Subject (gnuplot plot) + /Creator (gnuplot 5.2 patchlevel 8) +% /Producer (gnuplot) +% /Keywords () + /CreationDate (Wed Sep 28 13:13:45 2022) + /DOCINFO pdfmark +end +} ifelse +% +% Support for boxed text - Ethan A Merritt Sep 2016 +% +/InitTextBox { userdict /TBy2 3 -1 roll put userdict /TBx2 3 -1 roll put + userdict /TBy1 3 -1 roll put userdict /TBx1 3 -1 roll put + /Boxing true def } def +/ExtendTextBox { dup type /stringtype eq + { Boxing { gsave dup false charpath pathbbox + dup TBy2 gt {userdict /TBy2 3 -1 roll put} {pop} ifelse + dup TBx2 gt {userdict /TBx2 3 -1 roll put} {pop} ifelse + dup TBy1 lt {userdict /TBy1 3 -1 roll put} {pop} ifelse + dup TBx1 lt {userdict /TBx1 3 -1 roll put} {pop} ifelse + grestore } if } + {} ifelse} def +/PopTextBox { newpath TBx1 TBxmargin sub TBy1 TBymargin sub M + TBx1 TBxmargin sub TBy2 TBymargin add L + TBx2 TBxmargin add TBy2 TBymargin add L + TBx2 TBxmargin add TBy1 TBymargin sub L closepath } def +/DrawTextBox { PopTextBox stroke /Boxing false def} def +/FillTextBox { gsave PopTextBox fill grestore /Boxing false def} def +0 0 0 0 InitTextBox +/TBxmargin 20 def +/TBymargin 20 def +/Boxing false def +/textshow { ExtendTextBox Gshow } def +% +end +%%EndProlog +%%Page: 1 1 +gnudict begin +gsave +doclip +50 50 translate +0.100 0.100 scale +90 rotate +0 -5040 translate +0 setgray +newpath +(Helvetica) findfont 140 scalefont setfont +BackgroundColor 0 lt 3 1 roll 0 lt exch 0 lt or or not {gsave BackgroundColor C clippath fill grestore} if +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +938 448 M +6009 0 V +stroke +1.000 UL +LTb +LCb setrgbcolor +938 448 M +63 0 V +5946 0 R +-63 0 V +stroke +854 448 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 100)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +/vshift -46 def +1.000 UL +LTb +LCb setrgbcolor +938 867 M +31 0 V +5978 0 R +-31 0 V +938 1111 M +31 0 V +5978 0 R +-31 0 V +938 1285 M +31 0 V +5978 0 R +-31 0 V +938 1420 M +31 0 V +5978 0 R +-31 0 V +938 1530 M +31 0 V +5978 0 R +-31 0 V +938 1623 M +31 0 V +5978 0 R +-31 0 V +938 1704 M +31 0 V +5978 0 R +-31 0 V +938 1775 M +31 0 V +5978 0 R +-31 0 V +stroke +0.500 UL +LTa +LCa setrgbcolor +938 1838 M +6009 0 V +stroke +1.000 UL +LTb +LCb setrgbcolor +938 1838 M +63 0 V +5946 0 R +-63 0 V +stroke +854 1838 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 1000)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +938 2257 M +31 0 V +5978 0 R +-31 0 V +938 2502 M +31 0 V +5978 0 R +-31 0 V +938 2675 M +31 0 V +5978 0 R +-31 0 V +938 2810 M +31 0 V +5978 0 R +-31 0 V +938 2920 M +31 0 V +5978 0 R +-31 0 V +938 3013 M +31 0 V +5978 0 R +-31 0 V +938 3094 M +31 0 V +5978 0 R +-31 0 V +938 3165 M +31 0 V +5978 0 R +-31 0 V +stroke +0.500 UL +LTa +LCa setrgbcolor +938 3229 M +6009 0 V +stroke +1.000 UL +LTb +LCb setrgbcolor +938 3229 M +63 0 V +5946 0 R +-63 0 V +stroke +854 3229 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 10000)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +938 3647 M +31 0 V +5978 0 R +-31 0 V +938 3892 M +31 0 V +5978 0 R +-31 0 V +938 4066 M +31 0 V +5978 0 R +-31 0 V +938 4200 M +31 0 V +5978 0 R +-31 0 V +938 4311 M +31 0 V +5978 0 R +-31 0 V +938 4404 M +31 0 V +5978 0 R +-31 0 V +938 4484 M +31 0 V +5978 0 R +-31 0 V +938 4555 M +31 0 V +5978 0 R +-31 0 V +stroke +0.500 UL +LTa +LCa setrgbcolor +938 4619 M +6009 0 V +stroke +1.000 UL +LTb +LCb setrgbcolor +938 4619 M +63 0 V +5946 0 R +-63 0 V +stroke +854 4619 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 100000)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +938 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +938 448 M +0 63 V +0 4108 R +0 -63 V +stroke +938 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 0)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +1689 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +1689 448 M +0 63 V +0 4108 R +0 -63 V +stroke +1689 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 100)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +2440 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +2440 448 M +0 63 V +0 4108 R +0 -63 V +stroke +2440 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 200)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +3191 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +3191 448 M +0 63 V +0 4108 R +0 -63 V +stroke +3191 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 300)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +3943 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +3943 448 M +0 63 V +0 4108 R +0 -63 V +stroke +3943 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 400)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +4694 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +4694 448 M +0 63 V +0 4108 R +0 -63 V +stroke +4694 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 500)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +5445 448 M +0 3688 V +0 420 R +0 63 V +stroke +1.000 UL +LTb +LCb setrgbcolor +5445 448 M +0 63 V +0 4108 R +0 -63 V +stroke +5445 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 600)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +6196 448 M +0 3688 V +0 420 R +0 63 V +stroke +1.000 UL +LTb +LCb setrgbcolor +6196 448 M +0 63 V +0 4108 R +0 -63 V +stroke +6196 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 700)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +0.500 UL +LTa +LCa setrgbcolor +6947 448 M +0 4171 V +stroke +1.000 UL +LTb +LCb setrgbcolor +6947 448 M +0 63 V +0 4108 R +0 -63 V +stroke +6947 308 M +[ [(Helvetica) 140.0 0.0 true true 0 ( 800)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +1.000 UL +LTb +LCb setrgbcolor +1.000 UL +LTB +LCb setrgbcolor +938 4619 N +938 448 L +6009 0 V +0 4171 V +-6009 0 V +Z stroke +1.000 UP +1.000 UL +LTb +LCb setrgbcolor +LCb setrgbcolor +133 2533 M +currentpoint gsave translate -270 rotate 0 0 moveto +[ [(Helvetica) 140.0 0.0 true true 0 (Performance \(GFlop/s\))] +] -46.7 MCshow +grestore +/Helvetica findfont 140 scalefont setfont +LTb +LCb setrgbcolor +3942 98 M +[ [(Helvetica) 140.0 0.0 true true 0 (Matrix size \(N\))] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +LTb +LCb setrgbcolor +3942 4829 M +[ [(Helvetica) 140.0 0.0 true true 0 (NxN matrix-matrix-multiplication on 4-Core Intel\(R\) Xeon\(R\) CPU E3-1585L v5 )] +XYsave +[(Helvetica) 140.0 0.0 true true 0 ( )] +XYrestore +[(Helvetica) 140.0 0.0 true true 0 (3.00GHz)] +] -46.7 MCshow +/Helvetica findfont 140 scalefont setfont +LTb +% Begin plot #1 +1.000 UP +1.000 UL +LTb +0.58 0.00 0.83 C +LCb setrgbcolor +6296 4486 M +[ [(Helvetica) 140.0 0.0 true true 0 (Naive dgemm)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UP +1.000 UL +LTb +0.58 0.00 0.83 C +6380 4486 M +399 0 V +1171 2295 M +7 69 V +481 -156 V +8 46 V +225 -35 V +7 -150 V +8 86 V +466 20 V +7 -109 V +278 98 V +195 2 V +8 -631 V +7 636 V +466 -5 V +8 -77 V +7 77 V +721 -1 V +466 1 V +7 -46 V +233 45 V +8 -654 V +954 640 V +7 -487 V +954 308 V +8 -337 V +7 488 V +1171 2295 Pls +1178 2364 Pls +1659 2208 Pls +1667 2254 Pls +1892 2219 Pls +1899 2069 Pls +1907 2155 Pls +2373 2175 Pls +2380 2066 Pls +2658 2164 Pls +2853 2166 Pls +2861 1535 Pls +2868 2171 Pls +3334 2166 Pls +3342 2089 Pls +3349 2166 Pls +4070 2165 Pls +4536 2166 Pls +4543 2120 Pls +4776 2165 Pls +4784 1511 Pls +5738 2151 Pls +5745 1664 Pls +6699 1972 Pls +6707 1635 Pls +6714 2123 Pls +6579 4486 Pls +% End plot #1 +% Begin plot #2 +1.000 UP +1.000 UL +LTb +0.00 0.62 0.45 C +LCb setrgbcolor +6296 4346 M +[ [(Helvetica) 140.0 0.0 true true 0 (Blocked dgemm)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UP +1.000 UL +LTb +0.00 0.62 0.45 C +6380 4346 M +399 0 V +1171 2175 M +7 -15 V +481 8 V +8 0 V +225 -3 V +7 -82 V +8 64 V +466 -5 V +7 -54 V +278 40 V +195 -6 V +8 -446 V +7 445 V +466 8 V +8 -121 V +7 108 V +721 14 V +466 -4 V +7 -36 V +233 16 V +8 -553 V +954 567 V +7 -549 V +954 539 V +8 -424 V +7 409 V +1171 2175 Crs +1178 2160 Crs +1659 2168 Crs +1667 2168 Crs +1892 2165 Crs +1899 2083 Crs +1907 2147 Crs +2373 2142 Crs +2380 2088 Crs +2658 2128 Crs +2853 2122 Crs +2861 1676 Crs +2868 2121 Crs +3334 2129 Crs +3342 2008 Crs +3349 2116 Crs +4070 2130 Crs +4536 2126 Crs +4543 2090 Crs +4776 2106 Crs +4784 1553 Crs +5738 2120 Crs +5745 1571 Crs +6699 2110 Crs +6707 1686 Crs +6714 2095 Crs +6579 4346 Crs +% End plot #2 +% Begin plot #3 +1.000 UP +1.000 UL +LTb +0.34 0.71 0.91 C +LCb setrgbcolor +6296 4206 M +[ [(Helvetica) 140.0 0.0 true true 0 (MKL blas dgemm)] +] -46.7 MRshow +/Helvetica findfont 140 scalefont setfont +1.000 UP +1.000 UL +LTb +0.34 0.71 0.91 C +6380 4206 M +399 0 V +1171 3733 M +7 79 V +481 134 V +8 -28 V +225 -40 V +7 64 V +8 -28 V +466 9 V +7 66 V +278 -17 V +195 -28 V +8 33 V +7 -30 V +466 2 V +8 59 V +7 -32 V +721 -6 V +466 17 V +7 9 V +233 -33 V +8 38 V +954 -23 V +7 12 V +954 8 V +8 23 V +7 -52 V +1171 3733 Star +1178 3812 Star +1659 3946 Star +1667 3918 Star +1892 3878 Star +1899 3942 Star +1907 3914 Star +2373 3923 Star +2380 3989 Star +2658 3972 Star +2853 3944 Star +2861 3977 Star +2868 3947 Star +3334 3949 Star +3342 4008 Star +3349 3976 Star +4070 3970 Star +4536 3987 Star +4543 3996 Star +4776 3963 Star +4784 4001 Star +5738 3978 Star +5745 3990 Star +6699 3998 Star +6707 4021 Star +6714 3969 Star +6579 4206 Star +% End plot #3 +2.000 UL +LTb +LCb setrgbcolor +1.000 UL +LTB +LCb setrgbcolor +938 4619 N +938 448 L +6009 0 V +0 4171 V +-6009 0 V +Z stroke +1.000 UP +1.000 UL +LTb +LCb setrgbcolor +stroke +grestore +end +showpage +%%Trailer +%%DocumentFonts: Helvetica +%%Pages: 1 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data new file mode 100644 index 0000000..5ed8266 --- /dev/null +++ b/Project1/project_1_maggioni_claudio/matmult/timing_basic_dgemm.data @@ -0,0 +1,29 @@ +#Description: Naive, three-loop dgemm. + +Size: 31 Mflop/s: 2131.35 Percentage: 5.79 +Size: 32 Mflop/s: 2387.28 Percentage: 6.49 +Size: 96 Mflop/s: 1844.52 Percentage: 5.01 +Size: 97 Mflop/s: 1991.74 Percentage: 5.41 +Size: 127 Mflop/s: 1878.09 Percentage: 5.10 +Size: 128 Mflop/s: 1466.11 Percentage: 3.98 +Size: 129 Mflop/s: 1688.41 Percentage: 4.59 +Size: 191 Mflop/s: 1747.28 Percentage: 4.75 +Size: 192 Mflop/s: 1458.67 Percentage: 3.96 +Size: 229 Mflop/s: 1714.53 Percentage: 4.66 +Size: 255 Mflop/s: 1719.28 Percentage: 4.67 +Size: 256 Mflop/s: 604.682 Percentage: 1.64 +Size: 257 Mflop/s: 1733.51 Percentage: 4.71 +Size: 319 Mflop/s: 1720.89 Percentage: 4.68 +Size: 320 Mflop/s: 1514.24 Percentage: 4.11 +Size: 321 Mflop/s: 1721.2 Percentage: 4.68 +Size: 417 Mflop/s: 1718.17 Percentage: 4.67 +Size: 479 Mflop/s: 1719.18 Percentage: 4.67 +Size: 480 Mflop/s: 1594.88 Percentage: 4.33 +Size: 511 Mflop/s: 1716.8 Percentage: 4.67 +Size: 512 Mflop/s: 581.233 Percentage: 1.58 +Size: 639 Mflop/s: 1678.33 Percentage: 4.56 +Size: 640 Mflop/s: 749.008 Percentage: 2.04 +Size: 767 Mflop/s: 1247.59 Percentage: 3.39 +Size: 768 Mflop/s: 714.52 Percentage: 1.94 +Size: 769 Mflop/s: 1603.09 Percentage: 4.36 +#Average percentage of Peak = 4.24797 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data new file mode 100644 index 0000000..acbacab --- /dev/null +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blas_dgemm.data @@ -0,0 +1,29 @@ +#Description: Reference dgemm. + +Size: 31 Mflop/s: 23035.3 Percentage: 62.60 +Size: 32 Mflop/s: 26290.9 Percentage: 71.44 +Size: 96 Mflop/s: 32829.1 Percentage: 89.21 +Size: 97 Mflop/s: 31312.6 Percentage: 85.09 +Size: 127 Mflop/s: 29329 Percentage: 79.70 +Size: 128 Mflop/s: 32578.6 Percentage: 88.53 +Size: 129 Mflop/s: 31113.1 Percentage: 84.55 +Size: 191 Mflop/s: 31590.5 Percentage: 85.84 +Size: 192 Mflop/s: 35219.4 Percentage: 95.70 +Size: 229 Mflop/s: 34236 Percentage: 93.03 +Size: 255 Mflop/s: 32692.5 Percentage: 88.84 +Size: 256 Mflop/s: 34510 Percentage: 93.78 +Size: 257 Mflop/s: 32844.7 Percentage: 89.25 +Size: 319 Mflop/s: 32950.9 Percentage: 89.54 +Size: 320 Mflop/s: 36332.4 Percentage: 98.73 +Size: 321 Mflop/s: 34460.3 Percentage: 93.64 +Size: 417 Mflop/s: 34136 Percentage: 92.76 +Size: 479 Mflop/s: 35101.8 Percentage: 95.39 +Size: 480 Mflop/s: 35608.8 Percentage: 96.76 +Size: 511 Mflop/s: 33768.6 Percentage: 91.76 +Size: 512 Mflop/s: 35947 Percentage: 97.68 +Size: 639 Mflop/s: 34572.5 Percentage: 93.95 +Size: 640 Mflop/s: 35268.1 Percentage: 95.84 +Size: 767 Mflop/s: 35731.4 Percentage: 97.10 +Size: 768 Mflop/s: 37114.6 Percentage:100.85 +Size: 769 Mflop/s: 34093.6 Percentage: 92.65 +#Average percentage of Peak = 90.1618 diff --git a/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data new file mode 100644 index 0000000..c50d31a --- /dev/null +++ b/Project1/project_1_maggioni_claudio/matmult/timing_blocked_dgemm.data @@ -0,0 +1,29 @@ +#Description: Naive, three-loop dgemm. + +Size: 31 Mflop/s: 1065.56 Percentage: 2.90 +Size: 32 Mflop/s: 1703.76 Percentage: 4.63 +Size: 96 Mflop/s: 1730.73 Percentage: 4.70 +Size: 97 Mflop/s: 1728.48 Percentage: 4.70 +Size: 127 Mflop/s: 1718.52 Percentage: 4.67 +Size: 128 Mflop/s: 1533.64 Percentage: 4.17 +Size: 129 Mflop/s: 1724.17 Percentage: 4.69 +Size: 191 Mflop/s: 1636.9 Percentage: 4.45 +Size: 192 Mflop/s: 1534.75 Percentage: 4.17 +Size: 229 Mflop/s: 1604.48 Percentage: 4.36 +Size: 255 Mflop/s: 1462.11 Percentage: 3.97 +Size: 256 Mflop/s: 730.562 Percentage: 1.99 +Size: 257 Mflop/s: 1483.12 Percentage: 4.03 +Size: 319 Mflop/s: 1409.3 Percentage: 3.83 +Size: 320 Mflop/s: 1303.95 Percentage: 3.54 +Size: 321 Mflop/s: 1621.34 Percentage: 4.41 +Size: 417 Mflop/s: 1496.69 Percentage: 4.07 +Size: 479 Mflop/s: 1518.7 Percentage: 4.13 +Size: 480 Mflop/s: 1429.18 Percentage: 3.88 +Size: 511 Mflop/s: 1371.7 Percentage: 3.73 +Size: 512 Mflop/s: 602.424 Percentage: 1.64 +Size: 639 Mflop/s: 1339.03 Percentage: 3.64 +Size: 640 Mflop/s: 913.949 Percentage: 2.48 +Size: 767 Mflop/s: 1566.19 Percentage: 4.26 +Size: 768 Mflop/s: 757.52 Percentage: 2.06 +Size: 769 Mflop/s: 1559.49 Percentage: 4.24 +#Average percentage of Peak = 3.81963