Merge branch 'master' of git.maggioni.xyz:maggicl/HPC

2022-09-27 09:00:20 +02:00 · 2022-09-27 09:00:20 +02:00 · 49e302e8bc
commit 49e302e8bc
parent 60a3e1b354 eba9ed889e
22 changed files with 3120 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,302 @@
 ## Core latex/pdflatex auxiliary files:
 *.aux
 *.lof
 *.log
 *.lot
 *.fls
 *.out
 *.toc
 *.fmt
 *.fot
 *.cb
 *.cb2
 .*.lb
 ## Intermediate documents:
 *.dvi
 *.xdv
 *-converted-to.*
 # these rules might exclude image files for figures etc.
 # *.ps
 # *.eps
 # *.pdf
 ## Generated if empty string is given at "Please type another file name for output:"
 .pdf
 ## Bibliography auxiliary files (bibtex/biblatex/biber):
 *.bbl
 *.bcf
 *.blg
 *-blx.aux
 *-blx.bib
 *.run.xml
 ## Build tool auxiliary files:
 *.fdb_latexmk
 *.synctex
 *.synctex(busy)
 *.synctex.gz
 *.synctex.gz(busy)
 *.pdfsync
 ## Build tool directories for auxiliary files
 # latexrun
 latex.out/
 ## Auxiliary and intermediate files from other packages:
 # algorithms
 *.alg
 *.loa
 # achemso
 acs-*.bib
 # amsthm
 *.thm
 # beamer
 *.nav
 *.pre
 *.snm
 *.vrb
 # changes
 *.soc
 # comment
 *.cut
 # cprotect
 *.cpt
 # elsarticle (documentclass of Elsevier journals)
 *.spl
 # endnotes
 *.ent
 # fixme
 *.lox
 # feynmf/feynmp
 *.mf
 *.mp
 *.t[1-9]
 *.t[1-9][0-9]
 *.tfm
 #(r)(e)ledmac/(r)(e)ledpar
 *.end
 *.?end
 *.[1-9]
 *.[1-9][0-9]
 *.[1-9][0-9][0-9]
 *.[1-9]R
 *.[1-9][0-9]R
 *.[1-9][0-9][0-9]R
 *.eledsec[1-9]
 *.eledsec[1-9]R
 *.eledsec[1-9][0-9]
 *.eledsec[1-9][0-9]R
 *.eledsec[1-9][0-9][0-9]
 *.eledsec[1-9][0-9][0-9]R
 # glossaries
 *.acn
 *.acr
 *.glg
 *.glo
 *.gls
 *.glsdefs
 *.lzo
 *.lzs
 *.slg
 *.slo
 *.sls
 # uncomment this for glossaries-extra (will ignore makeindex's style files!)
 # *.ist
 # gnuplot
 *.gnuplot
 *.table
 # gnuplottex
 *-gnuplottex-*
 # gregoriotex
 *.gaux
 *.glog
 *.gtex
 # htlatex
 *.4ct
 *.4tc
 *.idv
 *.lg
 *.trc
 *.xref
 # hyperref
 *.brf
 # knitr
 *-concordance.tex
 # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
 # *.tikz
 *-tikzDictionary
 # listings
 *.lol
 # luatexja-ruby
 *.ltjruby
 # makeidx
 *.idx
 *.ilg
 *.ind
 # minitoc
 *.maf
 *.mlf
 *.mlt
 *.mtc[0-9]*
 *.slf[0-9]*
 *.slt[0-9]*
 *.stc[0-9]*
 # minted
 _minted*
 *.pyg
 # morewrites
 *.mw
 # newpax
 *.newpax
 # nomencl
 *.nlg
 *.nlo
 *.nls
 # pax
 *.pax
 # pdfpcnotes
 *.pdfpc
 # sagetex
 *.sagetex.sage
 *.sagetex.py
 *.sagetex.scmd
 # scrwfile
 *.wrt
 # svg
 svg-inkscape/
 # sympy
 *.sout
 *.sympy
 sympy-plots-for-*.tex/
 # pdfcomment
 *.upa
 *.upb
 # pythontex
 *.pytxcode
 pythontex-files-*/
 # tcolorbox
 *.listing
 # thmtools
 *.loe
 # TikZ & PGF
 *.dpth
 *.md5
 *.auxlock
 # titletoc
 *.ptc
 # todonotes
 *.tdo
 # vhistory
 *.hst
 *.ver
 # easy-todo
 *.lod
 # xcolor
 *.xcp
 # xmpincl
 *.xmpi
 # xindy
 *.xdy
 # xypic precompiled matrices and outlines
 *.xyc
 *.xyd
 # endfloat
 *.ttt
 *.fff
 # Latexian
 TSWLatexianTemp*
 ## Editors:
 # WinEdt
 *.bak
 *.sav
 # Texpad
 .texpadtmp
 # LyX
 *.lyx~
 # Kile
 *.backup
 # gummi
 .*.swp
 # KBibTeX
 *~[0-9]*
 # TeXnicCenter
 *.tps
 # auto folder when using emacs and auctex
 ./auto/*
 *.el
 # expex forward references with \gathertags
 *-tags.tex
 # standalone packages
 *.sta
 # Makeindex log files
 *.lpz
 # xwatermark package
 *.xwm
 # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
 # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
 # Uncomment the next line to have this generated file ignored.
 #*Notes.bib
--- a/Project0/project0.pdf
+++ b/Project0/project0.pdf
--- a/Project1/Makefile
+++ b/Project1/Makefile
@ -0,0 +1,13 @@
 filename=project_1_maggioni_claudio
 pdf:
 	pdflatex ${filename}
 	pdflatex ${filename}
 	make clean
 read:
 	evince ${filename}.pdf &
 clean:
 	rm -f ${filename}.out ${filename}.log ${filename}.bbl ${filename}.blg ${filename}.au ${filename}.log  ${filename}.ps ${filename}.aux ${filename}.out ${filename}.dvi ${filename}.bbl ${filename}.blg ${filename}.toc  ${filename}.nav ${filename}.vrb ${filename}.snm
--- a/Project1/assignment.sty
+++ b/Project1/assignment.sty
@ -0,0 +1,95 @@
 \usepackage{ifthen}
 \usepackage[utf8]{inputenc}
 \usepackage{graphics}
 \usepackage{graphicx}
 \usepackage{hyperref}
 \pagestyle{plain}
 \voffset -5mm
 \oddsidemargin  0mm
 \evensidemargin -11mm
 \marginparwidth 2cm
 \marginparsep 0pt
 \topmargin 0mm
 \headheight 0pt
 \headsep 0pt
 \topskip 0pt        
 \textheight 255mm
 \textwidth 165mm
 \newcommand{\duedate} {}
 \newcommand{\setduedate}[1]{%
 \renewcommand\duedate {Due date:~ #1}}
 \newcommand\isassignment {false}
 \newcommand{\setassignment}{\renewcommand\isassignment {true}}
 \newcommand{\ifassignment}[1]{\ifthenelse{\boolean{\isassignment}}{#1}{}}
 \newcommand{\ifnotassignment}[1]{\ifthenelse{\boolean{\isassignment}}{}{#1}}
 \newcommand{\assignmentpolicy}{
 \begin{table}[h]
 \begin{center}
 \scalebox{0.8} {%
 \begin{tabular}{|p{0.02cm}p{16cm}|}
 \hline
 &\\
 \multicolumn{2}{|c|}{\Large\textbf{HPC  2022 ---  Submission Instructions}}\\
 \multicolumn{2}{|c|}{\large\textbf{(Please, notice that following instructions are mandatory: }}\\
 \multicolumn{2}{|c|}{\large\textbf{submissions that don't comply with, won't be considered)}}\\
 &\\
 \textbullet & Assignments must be submitted to \href{https://www.icorsi.ch/course/view.php?id=14652}{iCorsi} (i.e. in electronic format).\\
 \textbullet & Provide both executable package and sources (e.g. C/C++ files, Matlab). 
 If you are using libraries, please add them in the file. Sources must be organized in directories called:\\
 \multicolumn{2}{|c|}{\textit{Project\_number\_lastname\_firstname}}\\
 & and  the  file must be called:\\
 \multicolumn{2}{|c|}{\textit{project\_number\_lastname\_firstname.zip}}\\
 \multicolumn{2}{|c|}{\textit{project\_number\_lastname\_firstname.pdf}}\\
 \textbullet &  The TAs will grade your project by reviewing your project write-up, and looking at the implementation 
                 you attempted, and benchmarking your code's performance.\\
 \textbullet & You are allowed to discuss all questions with anyone you like; however: (i) your submission must list anyone you discussed problems with and (ii) you must write up your submission independently.\\
 \hline
 \end{tabular}
 }
 \end{center}
 \end{table}
 }
 \newcommand{\punkte}[1]{\hspace{1ex}\emph{\mdseries\hfill(#1~\ifcase#1{Points}\or{Points}\else{Points}\fi)}}
 \newcommand\serieheader[6]{
 \thispagestyle{empty}%
 \begin{flushleft}
 \includegraphics[width=0.4\textwidth]{usi_inf.png}
 \end{flushleft}
  \noindent%
  {\large\ignorespaces{\textbf{#1}}\hspace{\fill}\ignorespaces{ \textbf{#2}}}\\ \\%
  {\large\ignorespaces #3 \hspace{\fill}\ignorespaces #4}\\
  \noindent%
  \bigskip
  \hrule\par\bigskip\noindent%
  \bigskip {\ignorespaces {\Large{\textbf{#5}}}
  \hspace{\fill}\ignorespaces \large \ifthenelse{\boolean{\isassignment}}{\duedate}{#6}}
  \hrule\par\bigskip\noindent%  \linebreak
 }
 \makeatletter
 \def\enumerateMod{\ifnum \@enumdepth >3 \@toodeep\else
      \advance\@enumdepth \@ne
      \edef\@enumctr{enum\romannumeral\the\@enumdepth}\list
      {\csname label\@enumctr\endcsname}{\usecounter
        {\@enumctr}%%%? the following differs from "enumerate"
 	\topsep0pt%
 	\partopsep0pt%
 	\itemsep0pt%
 	\def\makelabel##1{\hss\llap{##1}}}\fi}
 \let\endenumerateMod =\endlist
 \makeatother
 \usepackage{textcomp}
--- a/Project1/project1.pdf
+++ b/Project1/project1.pdf
--- a/Project1/project_1_maggioni_claudio.pdf
+++ b/Project1/project_1_maggioni_claudio.pdf
--- a/Project1/project_1_maggioni_claudio.tex
+++ b/Project1/project_1_maggioni_claudio.tex
@ -0,0 +1,79 @@
 \documentclass[unicode,11pt,a4paper,oneside,numbers=endperiod,openany]{scrartcl}
 \input{assignment.sty}
 \usepackage{fancyvrb}
 \begin{document}
 \setassignment
 \setduedate{12.10.2022 (midnight)}
 \serieheader{High-Performance Computing Lab}{2022}{Student: Claudio
 Maggioni}{Discussed with: ---}{Solution for Project 1}{}
 \newline
 \assignmentpolicy
 In this project you will practice memory access optimization, performance-oriented programming, and OpenMP parallelizaton 
 on the ICS Cluster .  
 \section{Explaining Memory Hierarchies \punkte{25}}
 By identifying the memory hierarchy parameters through \texttt{likwid-topology} 
 for the cache topology and \texttt{free -g} for the amount of primary memory I
 find the following values:
 \begin{center}
 \begin{tabular}{llll}
 Main memory & 62 GB              \\
 L3 cache    & 25 MB per socket   \\
 L2 cache    & 256 kB per core    \\
 L1 cache    & 32 kB per core    
 \end{tabular}
 \end{center}
 All values are reported using base 2 IEC byte units. The cluster has 2 sockets
 and a total of 20 cores (10 per socket). The cache topology diagram reported by
 \texttt{likwid-topology -g} is the following:
 \pagebreak[4]
 % https://tex.stackexchange.com/a/171818
 \begin{Verbatim}[fontsize=\tiny] 
 Socket 0:
 +---------------------------------------------------------------------------------------------------------------+
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | |    0   | |    1   | |    2   | |    3   | |    4   | |    5   | |    6   | |    7   | |    8   | |    9   | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +-----------------------------------------------------------------------------------------------------------+ |
 | |                                                   25 MB                                                   | |
 | +-----------------------------------------------------------------------------------------------------------+ |
 +---------------------------------------------------------------------------------------------------------------+
 Socket 1:
 +---------------------------------------------------------------------------------------------------------------+
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | |   10   | |   11   | |   12   | |   13   | |   14   | |   15   | |   16   | |   17   | |   18   | |   19   | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |  32 kB | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
 | +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
 | +-----------------------------------------------------------------------------------------------------------+ |
 | |                                                   25 MB                                                   | |
 | +-----------------------------------------------------------------------------------------------------------+ |
 +---------------------------------------------------------------------------------------------------------------+
 \end{Verbatim}
 \section{Optimize Square Matrix-Matrix Multiplication  \punkte{60}}
 \section{Quality of the Report  \punkte{15}}
 \end{document}
--- a/Project1/project_1_maggioni_claudio/matmult/Makefile
+++ b/Project1/project_1_maggioni_claudio/matmult/Makefile
@ -0,0 +1,33 @@
 # On Euler, we will benchmark your DGEMM's performance against the performance
 # of the default vendor-tuned DGEMM. This is done in benchmark-blas.
 #
 CC = gcc
 OPT = -O2
 CFLAGS = -Wall -std=gnu99 $(OPT)
 LDFLAGS = -Wall
 # librt is needed for clock_gettime
 LDLIBS = -lrt -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lpthread -lm -ldl -m64 -I${MKLROOT}/include
 targets = benchmark-naive benchmark-blocked benchmark-blas
 objects = benchmark.o dgemm-naive.o dgemm-blocked.o dgemm-blas.o  
 .PHONY : default
 default : all
 .PHONY : all
 all : clean $(targets)
 benchmark-naive : benchmark.o dgemm-naive.o 
 	$(CC) -o $@ $^ $(LDLIBS)
 benchmark-blocked : benchmark.o dgemm-blocked.o
 	$(CC) -o $@ $^ $(LDLIBS)
 benchmark-blas : benchmark.o dgemm-blas.o
 	$(CC) -o $@ $^ $(LDLIBS)
 %.o : %.c
 	$(CC) -c $(CFLAGS) $<
 .PHONY : clean
 clean:
 	rm -f $(targets) $(objects)
--- a/Project1/project_1_maggioni_claudio/matmult/benchmark.c
+++ b/Project1/project_1_maggioni_claudio/matmult/benchmark.c
@ -0,0 +1,174 @@
 #include <stdlib.h> // For: exit, drand48, malloc, free, NULL, EXIT_FAILURE
 #include <stdio.h>  // For: perror
 #include <string.h> // For: memset
 #include <float.h>  // For: DBL_EPSILON
 #include <math.h>   // For: fabs
 #ifdef GETTIMEOFDAY
 #include <sys/time.h> // For struct timeval, gettimeofday
 #else
 #include <time.h> // For struct timespec, clock_gettime, CLOCK_MONOTONIC
 #endif
 // On icsmaster
 // 2.3 GHz * 8 vector width * 2 flops for FMA = 36.8 GF/s
 #define MAX_SPEED 36.8
 /* reference_dgemm wraps a call to the BLAS-3 routine DGEMM, via the standard FORTRAN interface - hence the reference semantics. */ 
 #define DGEMM dgemm_
 extern void DGEMM (char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); 
 void reference_dgemm (int N, double ALPHA, double* A, double* B, double* C)
 {
  char TRANSA = 'N';
  char TRANSB = 'N';
  int M = N;
  int K = N;
  double BETA = 1.;
  int LDA = N;
  int LDB = N;
  int LDC = N;
  DGEMM(&TRANSA, &TRANSB, &M, &N, &K, &ALPHA, A, &LDA, B, &LDB, &BETA, C, &LDC);
 }   
 /* Your function must have the following signature: */
 extern const char* dgemm_desc;
 extern void square_dgemm (int, double*, double*, double*);
 double wall_time ()
 {
 #ifdef GETTIMEOFDAY
  struct timeval t;
  gettimeofday (&t, NULL);
  return 1.*t.tv_sec + 1.e-6*t.tv_usec;
 #else
  struct timespec t;
  clock_gettime (CLOCK_MONOTONIC, &t);
  return 1.*t.tv_sec + 1.e-9*t.tv_nsec;
 #endif
 }
 void die (const char* message)
 {
  perror (message);
  exit (EXIT_FAILURE);
 }
 void fill (double* p, int n)
 {
  for (int i = 0; i < n; ++i)
    p[i] = 2 * drand48() - 1; // Uniformly distributed over [-1, 1]
 }
 void absolute_value (double *p, int n)
 {
  for (int i = 0; i < n; ++i)
    p[i] = fabs (p[i]);
 }
 /* The benchmarking program */
 int main (int argc, char **argv)
 {
  printf ("#Description:\t%s\n\n", dgemm_desc);
  /* Test sizes should highlight performance dips at multiples of certain powers-of-two */
  int test_sizes[] = 
  /* Multiples-of-32, +/- 1. Currently commented. */
  /* {31,32,33,63,64,65,95,96,97,127,128,129,159,160,161,191,192,193,223,224,225,255,256,257,287,288,289,319,320,321,351,352,353,383,384,385,415,416,417,447,448,449,479,480,481,511,512,513,543,544,545,575,576,577,607,608,609,639,640,641,671,672,673,703,704,705,735,736,737,767,768,769,799,800,801,831,832,833,863,864,865,895,896,897,927,928,929,959,960,961,991,992,993,1023,1024,1025}; */
  /* A representative subset of the first list. Currently uncommented. */ 
  { 31, 32, 96, 97, 127, 128, 129, 191, 192, 229, 255, 256, 257,
    319, 320, 321, 417, 479, 480, 511, 512, 639, 640, 767, 768, 769 };
  int nsizes = sizeof(test_sizes)/sizeof(test_sizes[0]);
  /* assume last size is also the largest size */
  int nmax = test_sizes[nsizes-1];
  /* allocate memory for all problems */
  double* buf = NULL;
  buf = (double*) malloc (3 * nmax * nmax * sizeof(double));
  if (buf == NULL) die ("failed to allocate largest problem size");
  double Mflops_s[nsizes],per[nsizes],aveper;
  /* For each test size */
  for (int isize = 0; isize < sizeof(test_sizes)/sizeof(test_sizes[0]); ++isize)
  {
    /* Create and fill 3 random matrices A,B,C*/
    int n = test_sizes[isize];
    double* A = buf + 0;
    double* B = A + nmax*nmax;
    double* C = B + nmax*nmax;
    fill (A, n*n);
    fill (B, n*n);
    fill (C, n*n);
    /* Measure performance (in Gflops/s). */
    /* Time a "sufficiently long" sequence of calls to reduce noise */
    double Gflops_s, seconds = -1.0;
    double timeout = 0.1; // "sufficiently long" := at least 1/10 second.
    for (int n_iterations = 1; seconds < timeout; n_iterations *= 2) 
    {
      /* Warm-up */
      square_dgemm (n, A, B, C);
      /* Benchmark n_iterations runs of square_dgemm */
      seconds = -wall_time();
      for (int it = 0; it < n_iterations; ++it)
 	square_dgemm (n, A, B, C);
      seconds += wall_time();
      /*  compute Gflop/s rate */
      Gflops_s = 2.e-9 * n_iterations * n * n * n / seconds;
    }
    /* Storing Mflop rate and calculating percentage of peak */
    Mflops_s[isize] = Gflops_s*1000;
    per[isize] = Gflops_s*100/MAX_SPEED;
    printf ("Size: %d\tMflop/s: %8g\tPercentage:%6.2lf\n", n, Mflops_s[isize],per[isize]);
    /* Ensure that error does not exceed the theoretical error bound. */
    /* C := A * B, computed with square_dgemm */
    memset (C, 0, n * n * sizeof(double));
    square_dgemm (n, A, B, C);
    /* Do not explicitly check that A and B were unmodified on square_dgemm exit
     *  - if they were, the following will most likely detect it:   
     * C := C - A * B, computed with reference_dgemm */
    reference_dgemm(n, -1., A, B, C);
    /* A := |A|, B := |B|, C := |C| */
    absolute_value (A, n * n);
    absolute_value (B, n * n);
    absolute_value (C, n * n);
    /* C := |C| - 3 * e_mach * n * |A| * |B|, computed with reference_dgemm */ 
    reference_dgemm (n, -3.*DBL_EPSILON*n, A, B, C);
    /* If any element in C is positive, then something went wrong in square_dgemm */
    for (int i = 0; i < n * n; ++i)
      if (C[i] > 0)
 	die("*** FAILURE *** Error in matrix multiply exceeds componentwise error bounds.\n" );
  }
  /* Calculating average percentage of peak reached by algorithm */
  aveper=0;
  for (int i=0; i<nsizes;i++)
    aveper+= per[i];
  aveper/=nsizes*1.0;
  /* Printing average percentage to screen */
  printf("#Average percentage of Peak = %g\n",aveper);
  free (buf);
  return 0;
 }
--- a/Project1/project_1_maggioni_claudio/matmult/dgemm-blas.c
+++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-blas.c
@ -0,0 +1,38 @@
 /* 
    Please include compiler name below (you may also include any other modules you would like to be loaded)
 COMPILER= gnu
    Please include All compiler flags and libraries as you want them run. You can simply copy this over from the Makefile's first few lines
 CC = cc
 OPT = -O3
 CFLAGS = -Wall -std=gnu99 $(OPT)
 MKLROOT = /opt/intel/composer_xe_2013.1.117/mkl
 LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread -lm
 */
 #define DGEMM dgemm_
 extern void DGEMM (char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); 
 const char* dgemm_desc = "Reference dgemm.";
 /* This routine performs a dgemm operation
 *  C := C + A * B
 * where A, B, and C are lda-by-lda matrices stored in column-major format.
 * On exit, A and B maintain their input values.    
 * This function wraps a call to the BLAS-3 routine DGEMM, via the standard FORTRAN interface - hence the reference semantics. */
 void square_dgemm (int N, double* A, double* B, double* C)
 {
  char TRANSA = 'N';
  char TRANSB = 'N';
  int M = N;
  int K = N;
  double ALPHA = 1.;
  double BETA = 1.;
  int LDA = N;
  int LDB = N;
  int LDC = N;
  DGEMM(&TRANSA, &TRANSB, &M, &N, &K, &ALPHA, A, &LDA, B, &LDB, &BETA, C, &LDC);
 }   
--- a/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c
+++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c
@ -0,0 +1,37 @@
 /* 
    Please include compiler name below (you may also include any other modules you would like to be loaded)
 COMPILER= gnu
    Please include All compiler flags and libraries as you want them run. You can simply copy this over from the Makefile's first few lines
 CC = cc
 OPT = -O3
 CFLAGS = -Wall -std=gnu99 $(OPT)
 MKLROOT = /opt/intel/composer_xe_2013.1.117/mkl
 LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread -lm
 */
 const char* dgemm_desc = "Naive, three-loop dgemm.";
 /* This routine performs a dgemm operation
 *  C := C + A * B
 * where A, B, and C are lda-by-lda matrices stored in column-major format.
 * On exit, A and B maintain their input values. */    
 void square_dgemm (int n, double* A, double* B, double* C)
 {
  // TODO: Implement the blocking optimization
  /* For each row i of A */
  for (int i = 0; i < n; ++i)
    /* For each column j of B */
    for (int j = 0; j < n; ++j) 
    {
      /* Compute C(i,j) */
      double cij = C[i+j*n];
      for( int k = 0; k < n; k++ )
 	cij += A[i+k*n] * B[k+j*n];
      C[i+j*n] = cij;
    }
 }
--- a/Project1/project_1_maggioni_claudio/matmult/dgemm-naive.c
+++ b/Project1/project_1_maggioni_claudio/matmult/dgemm-naive.c
@ -0,0 +1,35 @@
 /* 
    Please include compiler name below (you may also include any other modules you would like to be loaded)
 COMPILER= gnu
    Please include All compiler flags and libraries as you want them run. You can simply copy this over from the Makefile's first few lines
 CC = cc
 OPT = -O3
 CFLAGS = -Wall -std=gnu99 $(OPT)
 MKLROOT = /opt/intel/composer_xe_2013.1.117/mkl
 LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread -lm
 */
 const char* dgemm_desc = "Naive, three-loop dgemm.";
 /* This routine performs a dgemm operation
 *  C := C + A * B
 * where A, B, and C are lda-by-lda matrices stored in column-major format.
 * On exit, A and B maintain their input values. */    
 void square_dgemm (int n, double* A, double* B, double* C)
 {
  /* For each row i of A */
  for (int i = 0; i < n; ++i)
    /* For each column j of B */
    for (int j = 0; j < n; ++j) 
    {
      /* Compute C(i,j) */
      double cij = C[i+j*n];
      for( int k = 0; k < n; k++ )
 	cij += A[i+k*n] * B[k+j*n];
      C[i+j*n] = cij;
    }
 }
--- a/Project1/project_1_maggioni_claudio/matmult/run_matrixmult.sh
+++ b/Project1/project_1_maggioni_claudio/matmult/run_matrixmult.sh
@ -0,0 +1,28 @@
 #!/bin/bash -l
 #SBATCH --job-name=matrixmult
 #SBATCH --time=00:30:00
 #SBATCH --nodes=1
 #SBATCH --output=matrixmult-%j.out
 #SBATCH --error=matrixmult-%j.err
 # load modules
 if command -v module 1>/dev/null 2>&1; then
   module load gcc/10.1.0 intel-mkl/2020.1.217-gcc-10.1.0-qsctnr6 gnuplot
 fi
 export OMP_NUM_THREADS=1
 export MKL_NUM_THREADS=1
 echo "==== benchmark-naive ======================"
 ./benchmark-naive | tee timing_basic_dgemm.data
 echo
 echo "==== benchmark-blas ======================="
 ./benchmark-blas | tee timing_blas_dgemm.data
 echo
 echo "==== benchmark-blocked ===================="
 ./benchmark-blocked | tee timing_blocked_dgemm.data
 echo
 echo "==== plot results ========================="
 gnuplot timing.gp
--- a/Project1/project_1_maggioni_claudio/matmult/timing.gp
+++ b/Project1/project_1_maggioni_claudio/matmult/timing.gp
@ -0,0 +1,20 @@
 set title "NxN matrix-matrix-multiplication on 4-Core Intel(R) Xeon(R) CPU E3-1585L v5 @ 3.00GHz"
 set xlabel "Matrix size (N)"
 set ylabel "Performance (GFlop/s)"
 set grid
 set logscale y 10
 set terminal postscript color "Helvetica" 14
 set output "timing.ps"
 # set terminal png color "Helvetica" 14
 # set output "timing.png"
 # plot "timing.data" using 2:4 title "square_dgemm" with linespoints
 # For performance comparisons
 plot "timing_basic_dgemm.data"   using 2:4 title "Naive dgemm" with linespoints, \
     "timing_blocked_dgemm.data" using 2:4 title "Blocked dgemm" with linespoints, \
     "timing_blas_dgemm.data"   using 2:4 title "MKL blas dgemm" with linespoints
--- a/Project1/project_1_maggioni_claudio/membench/Makefile
+++ b/Project1/project_1_maggioni_claudio/membench/Makefile
@ -0,0 +1,30 @@
 #
 # Usage:
 #	make		# run benchmark on the local machine or on cluster compute node
 #			# operated by SLURM
 #
 .PRECIOUS:	%.gp %.xxx %.out
 #generic: generic.ps
 generic: membench
 membench: membench.c
 	gcc -O3 -o membench membench.c
 clean:
 	make generic.clean
 %.ps: membench %.gp
 	sbatch ./run_membench.sh $*
 	module load gnuplot
 	gnuplot %.gp
 %.gp: gnuplot.template
 	sed -e '/sarlacc/	s//$*/' gnuplot.template > $*.gp
 %.clean:
 	rm -f $*.ps $*.gp $*.xxx *.out membench
 tar:
 	cd ../; tar cf membench.tar membench/*
--- a/Project1/project_1_maggioni_claudio/membench/generic_macos.gp
+++ b/Project1/project_1_maggioni_claudio/membench/generic_macos.gp
@ -0,0 +1,35 @@
 set terminal postscript color
 set output "generic.ps"
 set style data linespoints
 set style line 1 linetype 2
 set style line 2 linetype 3
 set style line 3 linetype 1
 set logscale x 2
 set nokey
 set xtics (4,16,64,256,"1K" 1024,"4K" 4096,"16K" 16384,"64K" 65536,"256K" 262144,"1M" 1048576)
 set title "10-Core Intel(R) Xeon(R) CPU E3-1585L v5 @ 3.00GHz Read+Write (ns) Versus Stride"
 set xlabel "Stride (bytes)"
 set ylabel "Time Read+Write (nanoseconds)"
 set key on
 plot	'generic.xxx' index 0 using 2:3 title "0.5 KB" with linespoints, \
 	'generic.xxx' index 1 using 2:3 title "1 KB" with linespoints, \
 	'generic.xxx' index 2 using 2:3 title "2 KB" with linespoints, \
 	'generic.xxx' index 3 using 2:3 title "4 KB" with linespoints,  \
 	'generic.xxx' index 4 using 2:3 title "8 KB" with linespoints,  \
 	'generic.xxx' index 5 using 2:3 title "16 KB" with linespoints,  \
 	'generic.xxx' index 6 using 2:3 title "32 KB" with linespoints,  \
 	'generic.xxx' index 7 using 2:3 title "64 KB" with linespoints,  \
 	'generic.xxx' index 8 using 2:3 title "128 KB" with linespoints,  \
 	'generic.xxx' index 9 using 2:3 title "256 KB" with linespoints,  \
 	'generic.xxx' index 10 using 2:3 title "512 KB" with linespoints,  \
 	'generic.xxx' index 11 using 2:3 title "1 MB" with linespoints, \
 	'generic.xxx' index 12 using 2:3 title "2 MB" with linespoints, \
 	'generic.xxx' index 13 using 2:3 title "4 MB" with linespoints, \
 	'generic.xxx' index 14 using 2:3 title "8 MB" with linespoints, \
 	'generic.xxx' index 15 using 2:3 title "16 MB" with linespoints, \
 	'generic.xxx' index 16 using 2:3 title "32 MB" with linespoints, \
 	'generic.xxx' index 17 using 2:3 title "64 MB" with linespoints
--- a/Project1/project_1_maggioni_claudio/membench/generic_macos.ps
+++ b/Project1/project_1_maggioni_claudio/membench/generic_macos.ps
--- a/Project1/project_1_maggioni_claudio/membench/generic_macos.xxx
+++ b/Project1/project_1_maggioni_claudio/membench/generic_macos.xxx
@ -0,0 +1,315 @@
 512	4	0.422
 512	8	0.439
 512	16	0.403
 512	32	0.403
 512	64	0.398
 512	128	0.456
 512	256	0.726
 1024	4	0.410
 1024	8	0.420
 1024	16	0.420
 1024	32	0.407
 1024	64	0.400
 1024	128	0.391
 1024	256	0.454
 1024	512	0.728
 2048	4	0.404
 2048	8	0.408
 2048	16	0.431
 2048	32	0.419
 2048	64	0.398
 2048	128	0.438
 2048	256	0.395
 2048	512	0.464
 2048	1024	0.724
 4096	4	0.402
 4096	8	0.430
 4096	16	0.412
 4096	32	0.427
 4096	64	0.445
 4096	128	0.404
 4096	256	0.396
 4096	512	0.402
 4096	1024	0.461
 4096	2048	0.714
 8192	4	0.402
 8192	8	0.403
 8192	16	0.404
 8192	32	0.431
 8192	64	0.453
 8192	128	0.469
 8192	256	0.404
 8192	512	0.428
 8192	1024	0.406
 8192	2048	0.491
 8192	4096	0.753
 16384	4	0.417
 16384	8	0.403
 16384	16	0.441
 16384	32	0.441
 16384	64	0.443
 16384	128	0.505
 16384	256	0.498
 16384	512	0.420
 16384	1024	0.523
 16384	2048	0.631
 16384	4096	0.605
 16384	8192	0.705
 32768	4	0.425
 32768	8	0.412
 32768	16	0.403
 32768	32	0.403
 32768	64	0.406
 32768	128	0.406
 32768	256	0.427
 32768	512	0.463
 32768	1024	0.505
 32768	2048	0.670
 32768	4096	0.918
 32768	8192	0.581
 32768	16384	0.702
 65536	4	0.401
 65536	8	0.403
 65536	16	0.447
 65536	32	0.466
 65536	64	0.925
 65536	128	1.306
 65536	256	1.335
 65536	512	1.885
 65536	1024	2.523
 65536	2048	2.266
 65536	4096	3.132
 65536	8192	0.913
 65536	16384	0.582
 65536	32768	0.701
 131072	4	0.415
 131072	8	0.403
 131072	16	0.425
 131072	32	0.456
 131072	64	0.904
 131072	128	1.307
 131072	256	1.334
 131072	512	1.848
 131072	1024	2.393
 131072	2048	2.863
 131072	4096	3.742
 131072	8192	3.120
 131072	16384	0.887
 131072	32768	0.581
 131072	65536	0.701
 262144	4	0.438
 262144	8	0.440
 262144	16	0.472
 262144	32	0.518
 262144	64	1.014
 262144	128	1.698
 262144	256	1.630
 262144	512	2.226
 262144	1024	2.609
 262144	2048	3.078
 262144	4096	3.927
 262144	8192	3.755
 262144	16384	3.272
 262144	32768	0.948
 262144	65536	0.622
 262144	131072	0.701
 524288	4	0.405
 524288	8	0.431
 524288	16	0.481
 524288	32	0.818
 524288	64	1.572
 524288	128	2.656
 524288	256	2.957
 524288	512	3.704
 524288	1024	3.985
 524288	2048	4.254
 524288	4096	4.515
 524288	8192	4.148
 524288	16384	3.985
 524288	32768	3.220
 524288	65536	0.982
 524288	131072	0.697
 524288	262144	0.767
 1048576	4	0.404
 1048576	8	0.418
 1048576	16	0.546
 1048576	32	0.800
 1048576	64	1.499
 1048576	128	2.615
 1048576	256	3.033
 1048576	512	3.902
 1048576	1024	4.197
 1048576	2048	4.332
 1048576	4096	4.264
 1048576	8192	4.419
 1048576	16384	4.799
 1048576	32768	5.757
 1048576	65536	5.498
 1048576	131072	0.957
 1048576	262144	0.635
 1048576	524288	0.704
 2097152	4	0.635
 2097152	8	0.647
 2097152	16	0.556
 2097152	32	0.954
 2097152	64	1.703
 2097152	128	2.897
 2097152	256	3.222
 2097152	512	4.473
 2097152	1024	4.570
 2097152	2048	4.091
 2097152	4096	4.077
 2097152	8192	4.546
 2097152	16384	5.193
 2097152	32768	5.117
 2097152	65536	4.991
 2097152	131072	4.379
 2097152	262144	1.031
 2097152	524288	0.673
 2097152	1048576	0.726
 4194304	4	0.445
 4194304	8	0.636
 4194304	16	0.848
 4194304	32	1.132
 4194304	64	1.639
 4194304	128	3.468
 4194304	256	3.918
 4194304	512	4.942
 4194304	1024	4.904
 4194304	2048	4.221
 4194304	4096	4.554
 4194304	8192	5.309
 4194304	16384	5.732
 4194304	32768	5.519
 4194304	65536	5.235
 4194304	131072	4.912
 4194304	262144	4.715
 4194304	524288	1.013
 4194304	1048576	0.594
 4194304	2097152	0.740
 8388608	4	0.549
 8388608	8	0.755
 8388608	16	1.192
 8388608	32	1.612
 8388608	64	3.242
 8388608	128	5.062
 8388608	256	4.817
 8388608	512	4.999
 8388608	1024	6.507
 8388608	2048	7.264
 8388608	4096	5.980
 8388608	8192	4.671
 8388608	16384	4.947
 8388608	32768	5.228
 8388608	65536	5.524
 8388608	131072	5.782
 8388608	262144	4.873
 8388608	524288	3.119
 8388608	1048576	0.965
 8388608	2097152	0.580
 8388608	4194304	0.702
 16777216	4	0.581
 16777216	8	0.790
 16777216	16	1.447
 16777216	32	2.226
 16777216	64	5.026
 16777216	128	7.217
 16777216	256	11.799
 16777216	512	10.725
 16777216	1024	13.057
 16777216	2048	14.465
 16777216	4096	14.082
 16777216	8192	8.786
 16777216	16384	5.607
 16777216	32768	5.368
 16777216	65536	5.643
 16777216	131072	5.823
 16777216	262144	5.693
 16777216	524288	4.243
 16777216	1048576	3.339
 16777216	2097152	1.109
 16777216	4194304	0.579
 16777216	8388608	0.702
 33554432	4	0.540
 33554432	8	0.781
 33554432	16	1.266
 33554432	32	2.442
 33554432	64	5.082
 33554432	128	8.243
 33554432	256	12.508
 33554432	512	12.972
 33554432	1024	17.293
 33554432	2048	20.517
 33554432	4096	15.132
 33554432	8192	15.801
 33554432	16384	9.443
 33554432	32768	7.183
 33554432	65536	7.054
 33554432	131072	6.064
 33554432	262144	5.443
 33554432	524288	5.409
 33554432	1048576	4.329
 33554432	2097152	3.734
 33554432	4194304	0.956
 33554432	8388608	0.579
 33554432	16777216	0.699
 67108864	4	0.539
 67108864	8	0.794
 67108864	16	1.373
 67108864	32	2.531
 67108864	64	5.398
 67108864	128	9.175
 67108864	256	14.549
 67108864	512	17.047
 67108864	1024	19.841
 67108864	2048	21.698
 67108864	4096	15.342
 67108864	8192	14.757
 67108864	16384	14.683
 67108864	32768	11.404
 67108864	65536	9.460
 67108864	131072	6.333
 67108864	262144	5.737
 67108864	524288	5.177
 67108864	1048576	4.878
 67108864	2097152	4.219
 67108864	4194304	3.114
 67108864	8388608	0.967
 67108864	16777216	0.638
 67108864	33554432	0.707
--- a/Project1/project_1_maggioni_claudio/membench/gnuplot.template
+++ b/Project1/project_1_maggioni_claudio/membench/gnuplot.template
@ -0,0 +1,35 @@
 set terminal postscript color
 set output "sarlacc.ps"
 set style data linespoints
 set style line 1 linetype 2
 set style line 2 linetype 3
 set style line 3 linetype 1
 set logscale x 2
 set nokey
 set xtics (4,16,64,256,"1K" 1024,"4K" 4096,"16K" 16384,"64K" 65536,"256K" 262144,"1M" 1048576)
 set title "10-Core Intel(R) Xeon(R) CPU E3-1585L v5 @ 3.00GHz Read+Write (ns) Versus Stride"
 set xlabel "Stride (bytes)"
 set ylabel "Time Read+Write (nanoseconds)"
 set key on
 plot	'sarlacc.xxx' index 0 using 2:3 title "0.5 KB" with linespoints, \
 	'sarlacc.xxx' index 1 using 2:3 title "1 KB" with linespoints, \
 	'sarlacc.xxx' index 2 using 2:3 title "2 KB" with linespoints, \
 	'sarlacc.xxx' index 3 using 2:3 title "4 KB" with linespoints,  \
 	'sarlacc.xxx' index 4 using 2:3 title "8 KB" with linespoints,  \
 	'sarlacc.xxx' index 5 using 2:3 title "16 KB" with linespoints,  \
 	'sarlacc.xxx' index 6 using 2:3 title "32 KB" with linespoints,  \
 	'sarlacc.xxx' index 7 using 2:3 title "64 KB" with linespoints,  \
 	'sarlacc.xxx' index 8 using 2:3 title "128 KB" with linespoints,  \
 	'sarlacc.xxx' index 9 using 2:3 title "256 KB" with linespoints,  \
 	'sarlacc.xxx' index 10 using 2:3 title "512 KB" with linespoints,  \
 	'sarlacc.xxx' index 11 using 2:3 title "1 MB" with linespoints, \
 	'sarlacc.xxx' index 12 using 2:3 title "2 MB" with linespoints, \
 	'sarlacc.xxx' index 13 using 2:3 title "4 MB" with linespoints, \
 	'sarlacc.xxx' index 14 using 2:3 title "8 MB" with linespoints, \
 	'sarlacc.xxx' index 15 using 2:3 title "16 MB" with linespoints, \
 	'sarlacc.xxx' index 16 using 2:3 title "32 MB" with linespoints, \
 	'sarlacc.xxx' index 17 using 2:3 title "64 MB" with linespoints
--- a/Project1/project_1_maggioni_claudio/membench/membench.c
+++ b/Project1/project_1_maggioni_claudio/membench/membench.c
@ -0,0 +1,168 @@
 /* ==================================================================== *
 *                                                                      *
 *  membench.c --   Measurement of the performance of the memory        *
 *                  hierarchy.                                          *
 *                                                                      *
 * ==================================================================== */
 #include <unistd.h>
 #include <stdio.h>
 #include <sys/resource.h>
 #include <sys/times.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <time.h>
 #include <limits.h>
 #define CACHE_MIN (128)         /* smallest cache */
 #define CACHE_MAX (16 * 1024 * 1024) /* largest cache */
 #define	SAMPLE	10              /* to get larger time sample */
 int x[CACHE_MAX];               /* stride thru this array */
 /**
 * Get the number of CPU ticks since last booting the computer
 */
 inline unsigned long long getCPUTick (void)
 {
    unsigned lo, hi;
    asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
    return (unsigned long long) hi << 32 | lo;
 }
 /**
 * Get the current system time in milliseconds
 */
 unsigned long timeGetTime (void)
 {
    /* Using Linux Time Functions To Determine Time */
    struct timeval tv;
    gettimeofday (&tv, 0);
    return tv.tv_sec * 1000 + tv.tv_usec / 1000;
 }
 /**
 * Determine the CPU clock speed.
 * @param nTime The time in milliseconds used to perform the measurement
 */
 unsigned long getCPUSpeed (long nTime)
 {
    long long timeStart, timeStop;
    long long startTick, endTick;
    long long overhead = getCPUTick () - getCPUTick ();
    /* Calculate Starting Time And Start Tick */
    timeStart = timeGetTime ();
    while (timeGetTime () == timeStart)
        timeStart = timeGetTime();
    while (1)
    {
        timeStop = timeGetTime ();
        if ((timeStop - timeStart) > 1)
        {
            startTick = getCPUTick ();
            break;
        }
    }
    /* Calculate Stop Time And End Tick */
    timeStart = timeStop;
    while (1)
    {
        timeStop = timeGetTime();
        if ((timeStop - timeStart) > nTime)
        {
            endTick = getCPUTick();
            break;
        }
     }
     /* Return The Processors Speed In Hertz */
     return (unsigned long) ((endTick - startTick) + (overhead));
 }
 int main ()
 {
    int register i, index, stride, limit, temp;
    long steps, tsteps;
    int csize;
    /* timing variables */
    double sec;
    /* number of processor cycles used */
    unsigned long long cycles0, cycles;
    /* The CPU speed in Hz */
    unsigned long nHz = getCPUSpeed (1000);
    for (csize = CACHE_MIN; csize <= CACHE_MAX; csize <<= 1)
    {
        for (stride = 1; stride <= csize / 2; stride <<= 1)
        {
            /* init cycles counter */
            cycles = 0;
            /* cache size this loop */
            limit = csize - stride + 1;
            steps = 0;
            do
            {		
                cycles0 = getCPUTick ();
                for (i = SAMPLE * stride; i != 0; i--)
                {
                    /* larger sample */
                    for (index = 0; index < limit; index += stride)
                    {
                        /* cache access */
                        x[index] = x[index] + 1;
                    }	
                }
                /* count while loop iterations */
                steps++;
                cycles += getCPUTick () - cycles0;
            } while (cycles < nHz); /* repeat until collected 1 sec */
            sec = cycles / (double) nHz;
            /* repeat empty loop to subtract loop overhead */
            /* used to match # while iterations */
            tsteps = 0;
            /* repeat until same # iterations as above */
            do
            {
                cycles0 = getCPUTick ();
                for (i = SAMPLE * stride; i != 0; i--)
                {
                    /* larger sample */
                    for (index = 0; index < limit; index += stride)
                    {
                        /* dummy code */
                        temp = temp + index;
                    }
                }
                /* count while loop iterations */
                tsteps++;
                cycles -= getCPUTick () - cycles0;
            } while (tsteps < steps);
            printf ("Size:%7lu Stride:%7lu read+write:%10.3f ns, sec = %6.3f, cycles = %lld steps = %6.0f\n",
                csize * sizeof (int), stride * sizeof (int),
                (double) sec * 1e9 / (steps * SAMPLE * stride * ((limit - 1) / stride + 1)),
                sec, cycles, (double) steps);
            fflush(stdout);
        }
        printf ("\n\n");
    }
    return 0;
 }
--- a/Project1/project_1_maggioni_claudio/membench/run_membench.sh
+++ b/Project1/project_1_maggioni_claudio/membench/run_membench.sh
@ -0,0 +1,16 @@
 #!/bin/bash -l
 #SBATCH --job-name=membench
 #SBATCH --time=00:30:00
 #SBATCH --nodes=1
 #SBATCH --output=membench-%j.out
 #SBATCH --error=membench-%j.err
 # load modules
 if command -v module 1>/dev/null 2>&1; then
   module load gcc/10.1.0 gnuplot
 fi
 ./membench | sed -e '/:/	s//: /g' -e '/  */	s//	/g' | cut -f2,4,6 > generic.xxx && sed -e '/sarlacc/ s//generic/' gnuplot.template > generic.gp
 gnuplot generic.gp
--- a/Project1/usi_inf.png
+++ b/Project1/usi_inf.png