hw2: added sources

This commit is contained in:
Claudio Maggioni 2022-10-18 14:39:50 +02:00
parent 40b14bf138
commit 4a3a71a537
24 changed files with 820 additions and 0 deletions

View file

@ -0,0 +1,22 @@
CFLAGS = -g -O3 -fopenmp
all: omp_bug1 omp_bug2 omp_bug3 omp_bug4 omp_bug5
omp_bug1: omp_bug1.c
gcc $(CFLAGS) $< -o $@
omp_bug2: omp_bug2.c
gcc $(CFLAGS) $< -o $@
omp_bug3: omp_bug3.c
gcc $(CFLAGS) $< -o $@
omp_bug4: omp_bug4.c
gcc $(CFLAGS) $< -o $@
omp_bug5: omp_bug5.c
gcc $(CFLAGS) $< -o $@
rm -rf omp_bug1 omp_bug2 omp_bug3 omp_bug4 omp_bug5

View file

@ -0,0 +1,32 @@
* FILE: omp_bug1fix.c
* This example attempts to show use of the parallel for construct. However
* it will generate errors at compile time. Try to determine what is causing
* the error.
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define N 50
#define CHUNKSIZE 5
int main(int argc, char *argv[]) {
int i, chunk, tid;
float a[N], b[N], c[N];
/* Some initializations */
for (i = 0; i < N; i++)
a[i] = b[i] = i * 1.0;
chunk = CHUNKSIZE;
#pragma omp parallel for shared(a, b, c, chunk) private(i, tid) \
schedule(static, chunk)
tid = omp_get_thread_num();
for (i = 0; i < N; i++) {
c[i] = a[i] + b[i];
printf("tid= %d i= %d c[i]= %f\n", tid, i, c[i]);
} /* end of parallel for construct */

View file

@ -0,0 +1,36 @@
/* FILE: omp_bug2.c
* Another OpenMP program with a bug.
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
int nthreads, i, tid;
float total;
/*** Spawn parallel region ***/
#pragma omp parallel
/* Obtain thread number */
tid = omp_get_thread_num();
/* Only master thread does this */
if (tid == 0) {
nthreads = omp_get_num_threads();
printf("Number of threads = %d\n", nthreads);
printf("Thread %d is starting...\n", tid);
#pragma omp barrier
/* do some work */
total = 0.0;
#pragma omp for schedule(dynamic, 10)
for (i = 0; i < 1000000; i++)
total = total + i * 1.0;
printf("Thread %d is done! Total= %e\n", tid, total);
} /*** End of parallel region ***/

View file

@ -0,0 +1,81 @@
* FILE: omp_bug3.c
* Run time error
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define N 50
int main(int argc, char *argv[]) {
int i, nthreads, tid, section;
float a[N], b[N], c[N];
void print_results(float array[N], int tid, int section);
/* Some initializations */
for (i = 0; i < N; i++)
a[i] = b[i] = i * 1.0;
#pragma omp parallel private(c, i, tid, section)
tid = omp_get_thread_num();
if (tid == 0) {
nthreads = omp_get_num_threads();
printf("Number of threads = %d\n", nthreads);
/*** Use barriers for clean output ***/
#pragma omp barrier
printf("Thread %d starting...\n", tid);
#pragma omp barrier
#pragma omp sections nowait
#pragma omp section
section = 1;
for (i = 0; i < N; i++)
c[i] = a[i] * b[i];
print_results(c, tid, section);
#pragma omp section
section = 2;
for (i = 0; i < N; i++)
c[i] = a[i] + b[i];
print_results(c, tid, section);
} /* end of sections */
/*** Use barrier for clean output ***/
#pragma omp barrier
printf("Thread %d exiting...\n", tid);
} /* end of parallel section */
void print_results(float array[N], int tid, int section) {
int i, j;
j = 1;
/*** use critical for clean output ***/
#pragma omp critical
printf("\nThread %d did section %d. The results are:\n", tid, section);
for (i = 0; i < N; i++) {
printf("%e ", array[i]);
if (j == 6) {
j = 1;
} /*** end of critical ***/
#pragma omp barrier
printf("Thread %d done and synchronized.\n", tid);

View file

@ -0,0 +1,36 @@
* FILE: omp_bug4.c
* This very simple program causes a segmentation fault.
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define N 1048
int main(int argc, char *argv[]) {
int nthreads, tid, i, j;
double a[N][N];
/* Fork a team of threads with explicit variable scoping */
#pragma omp parallel shared(nthreads) private(i, j, tid, a)
/* Obtain/print thread info */
tid = omp_get_thread_num();
if (tid == 0) {
nthreads = omp_get_num_threads();
printf("Number of threads = %d\n", nthreads);
printf("Thread %d starting...\n", tid);
/* Each thread works on its own private copy of the array */
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
a[i][j] = tid + i + j;
/* For confirmation */
printf("Thread %d done. Last element= %f\n", tid, a[N - 1][N - 1]);
} /* All threads join master thread and disband */

View file

@ -0,0 +1,68 @@
* FILE: omp_bug5.c
* Using SECTIONS, two threads initialize their own array and then add
* it to the other's array, however a deadlock occurs.
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define N 1000000
#define PI 3.1415926535
#define DELTA .01415926535
int main(int argc, char *argv[]) {
int nthreads, tid, i;
float a[N], b[N];
omp_lock_t locka, lockb;
/* Initialize the locks */
/* Fork a team of threads giving them their own copies of variables */
#pragma omp parallel shared(a, b, nthreads, locka, lockb) private(tid)
/* Obtain thread number and number of threads */
tid = omp_get_thread_num();
#pragma omp master
nthreads = omp_get_num_threads();
printf("Number of threads = %d\n", nthreads);
printf("Thread %d starting...\n", tid);
#pragma omp barrier
#pragma omp sections nowait
#pragma omp section
printf("Thread %d initializing a[]\n", tid);
for (i = 0; i < N; i++)
a[i] = i * DELTA;
printf("Thread %d adding a[] to b[]\n", tid);
for (i = 0; i < N; i++)
b[i] += a[i];
#pragma omp section
printf("Thread %d initializing b[]\n", tid);
for (i = 0; i < N; i++)
b[i] = i * PI;
printf("Thread %d adding b[] to a[]\n", tid);
for (i = 0; i < N; i++)
a[i] += b[i];
} /* end of sections */
} /* end of parallel region */

View file

@ -0,0 +1,83 @@
// #include <omp.h>
#include "walltime.h"
#include <iostream>
#include <math.h>
#include <stdio.h>
#include <unistd.h>
#define NUM_ITERATIONS 100
// Example benchmarks
// 0.008s ~0.8MB
#define N 100000
// 0.1s ~8MB
// #define N 1000000
// 1.1s ~80MB
// #define N 10000000
// 13s ~800MB
// #define N 100000000
// 127s 16GB
//#define N 1000000000
#define EPSILON 0.1
using namespace std;
int main() {
double time_serial, time_start = 0.0;
double *a, *b;
// Allocate memory for the vectors as 1-D arrays
a = new double[N];
b = new double[N];
// Initialize the vectors with some values
for (int i = 0; i < N; i++) {
a[i] = i;
b[i] = i / 10.0;
long double alpha = 0;
// serial execution
// Note that we do extra iterations to reduce relative timing overhead
time_start = wall_time();
for (int iterations = 0; iterations < NUM_ITERATIONS; iterations++) {
alpha = 0.0;
for (int i = 0; i < N; i++) {
alpha += a[i] * b[i];
time_serial = wall_time() - time_start;
cout << "Serial execution time = " << time_serial << " sec" << endl;
long double alpha_parallel = 0;
double time_red = 0;
double time_critical = 0;
// TODO: Write parallel version (2 ways!)
// i. Using reduction pragma
// ii. Using critical pragma
for (int iterations = 0; iterations < NUM_ITERATIONS; iterations++) {
alpha_parallel = 0.0;
for (int i = 0; i < N; i++) {
alpha_parallel += a[i] * b[i];
if ((fabs(alpha_parallel - alpha) / fabs(alpha_parallel)) > EPSILON) {
cout << "parallel reduction: " << alpha_parallel << ", serial: " << alpha
<< "\n";
cerr << "Alpha not yet implemented correctly!\n";
cout << "Parallel dot product = " << alpha_parallel
<< " time using reduction method = " << time_red
<< " sec, time using critical method " << time_critical << " sec"
<< endl;
// De-allocate memory
delete[] a;
delete[] b;
return 0;

View file

@ -0,0 +1,7 @@
all: dotProduct
dotProduct: dotProduct.cpp walltime.h
g++ -O3 -fopenmp $< -o $@
rm -rf dotProduct

View file

@ -0,0 +1,24 @@
#include <sys/time.h> // For struct timeval, gettimeofday
#include <time.h> // For struct timespec, clock_gettime, CLOCK_MONOTONIC
#include <stdio.h>
#include <stdlib.h>
double wall_time() {
struct timeval t;
gettimeofday(&t, NULL);
return 1. * t.tv_sec + 1.e-6 * t.tv_usec;
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return 1. * t.tv_sec + 1.e-9 * t.tv_nsec;
void die(const char *message) {

View file

@ -0,0 +1,56 @@
#include "walltime.h"
#include <iostream>
#include <random>
#define VEC_SIZE 1000000000
#define BINS 16
using namespace std;
int main() {
double time_start, time_end;
// Initialize random number generator
unsigned int seed = 123;
float mean = BINS / 2.0;
float sigma = BINS / 12.0;
std::default_random_engine generator(seed);
std::normal_distribution<float> distribution(mean, sigma);
// Generate random sequence
// Note: normal distribution is on interval [-inf; inf]
// we want [0; BINS-1]
int *vec = new int[VEC_SIZE];
for (long i = 0; i < VEC_SIZE; ++i) {
vec[i] = int(distribution(generator));
if (vec[i] < 0)
vec[i] = 0;
if (vec[i] > BINS - 1)
vec[i] = BINS - 1;
// Initialize histogram
// Set all bins to zero
long dist[BINS];
for (int i = 0; i < BINS; ++i) {
dist[i] = 0;
time_start = wall_time();
// TODO Parallelize the histogram computation
for (long i = 0; i < VEC_SIZE; ++i) {
time_end = wall_time();
// Write results
for (int i = 0; i < BINS; ++i) {
cout << "dist[" << i << "]=" << dist[i] << endl;
cout << "Time: " << time_end - time_start << " sec" << endl;
delete[] vec;
return 0;

View file

@ -0,0 +1,55 @@
#include "walltime.h"
#include <iostream>
#include <random>
#define VEC_SIZE 1000000000
#define BINS 16
using namespace std;
int main() {
double time_start, time_end;
// Initialize random number generator
unsigned int seed = 123;
float mean = BINS / 2.0;
float sigma = BINS / 12.0;
std::default_random_engine generator(seed);
std::normal_distribution<float> distribution(mean, sigma);
// Generate random sequence
// Note: normal distribution is on interval [-inf; inf]
// we want [0; BINS-1]
int *vec = new int[VEC_SIZE];
for (long i = 0; i < VEC_SIZE; ++i) {
vec[i] = int(distribution(generator));
if (vec[i] < 0)
vec[i] = 0;
if (vec[i] > BINS - 1)
vec[i] = BINS - 1;
// Initialize histogram
// Set all bins to zero
long dist[BINS];
for (int i = 0; i < BINS; ++i) {
dist[i] = 0;
time_start = wall_time();
// Compute histogram
for (long i = 0; i < VEC_SIZE; ++i) {
time_end = wall_time();
// Write results
for (int i = 0; i < BINS; ++i) {
cout << "dist[" << i << "]=" << dist[i] << endl;
cout << "Time: " << time_end - time_start << " sec" << endl;
delete[] vec;
return 0;

View file

@ -0,0 +1,11 @@
all: hist_seq hist_omp
hist_seq: hist_seq.cpp
g++ -O3 $^ -o $@
hist_omp: hist_omp.cpp
g++ -O3 -fopenmp $^ -o $@
rm -rf hist_seq hist_omp

View file

@ -0,0 +1,24 @@
#include <sys/time.h> // For struct timeval, gettimeofday
#include <time.h> // For struct timespec, clock_gettime, CLOCK_MONOTONIC
#include <stdio.h>
#include <stdlib.h>
double wall_time() {
struct timeval t;
gettimeofday(&t, NULL);
return 1. * t.tv_sec + 1.e-6 * t.tv_usec;
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return 1. * t.tv_sec + 1.e-9 * t.tv_nsec;
void die(const char *message) {

View file

@ -0,0 +1,11 @@
all: recur_seq recur_omp
recur_seq: recur_seq.c
gcc -O3 $^ -o $@
recur_omp: recur_omp.c
gcc -O3 -fopenmp $^ -o $@ -lm
rm -rf recur_seq recur_omp

View file

@ -0,0 +1,34 @@
#include "walltime.h"
#include <math.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
int N = 2000000000;
double up = 1.00000001;
double Sn = 1.00000001;
int n;
/* allocate memory for the recursion */
double *opt = (double *)malloc((N + 1) * sizeof(double));
if (opt == NULL)
die("failed to allocate problem size");
double time_start = wall_time();
for (n = 0; n <= N; ++n) {
opt[n] = Sn;
Sn *= up;
printf("Parallel RunTime : %f seconds\n", wall_time() - time_start);
printf("Final Result Sn : %.17g \n", Sn);
double temp = 0.0;
for (n = 0; n <= N; ++n) {
temp += opt[n] * opt[n];
printf("Result ||opt||^2_2 : %f\n", temp / (double)N);
return 0;

View file

@ -0,0 +1,31 @@
#include "walltime.h"
#include <math.h>
int main(int argc, char *argv[]) {
int N = 2000000000;
double up = 1.00000001;
double Sn = 1.00000001;
int n;
/* allocate memory for the recursion */
double *opt = (double *)malloc((N + 1) * sizeof(double));
if (opt == NULL)
die("failed to allocate problem size");
double time_start = wall_time();
for (n = 0; n <= N; ++n) {
opt[n] = Sn;
Sn *= up;
printf("Sequential RunTime : %f seconds\n", wall_time() - time_start);
printf("Final Result Sn : %.17g \n", Sn);
double temp = 0.0;
for (n = 0; n <= N; ++n) {
temp += opt[n] * opt[n];
printf("Result ||opt||^2_2 : %f\n", temp / (double)N);
return 0;

View file

@ -0,0 +1,24 @@
#include <sys/time.h> // For struct timeval, gettimeofday
#include <time.h> // For struct timespec, clock_gettime, CLOCK_MONOTONIC
#include <stdio.h>
#include <stdlib.h>
double wall_time() {
struct timeval t;
gettimeofday(&t, NULL);
return 1. * t.tv_sec + 1.e-6 * t.tv_usec;
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
return 1. * t.tv_sec + 1.e-9 * t.tv_nsec;
void die(const char *message) {

View file

@ -0,0 +1,17 @@
#ifndef CONSTS_H_
#define CONSTS_H_
// maximum number of iterations
#define MAX_ITERS 35207
// image size
#define IMAGE_WIDTH 4096
#define IMAGE_HEIGHT 4096
// the extent of the parameter plane ( MIN_X + iMIN_Y <= c < MAX_X + iMAX_Y )
#define MIN_X -2.1
#define MAX_X 0.7
#define MIN_Y -1.4
#define MAX_Y 1.4
#endif /*CONSTS_H_*/

View file

@ -0,0 +1,8 @@
all: mandel_seq
mandel_seq: mandel_seq.c pngwriter.c
gcc -o $@ -I. -O3 $^ -lpng
rm -rf mandel_seq

View file

@ -0,0 +1,76 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include "consts.h"
#include "pngwriter.h"
unsigned long get_time() {
struct timeval tp;
gettimeofday(&tp, NULL);
return tp.tv_sec * 1000000 + tp.tv_usec;
int main(int argc, char **argv) {
png_data *pPng = png_create(IMAGE_WIDTH, IMAGE_HEIGHT);
double x, y, x2, y2, cx, cy;
cy = MIN_Y;
double fDeltaX = (MAX_X - MIN_X) / (double)IMAGE_WIDTH;
double fDeltaY = (MAX_Y - MIN_Y) / (double)IMAGE_HEIGHT;
long nTotalIterationsCount = 0;
unsigned long nTimeStart = get_time();
long i, j, n;
n = 0;
// do the calculation
for (j = 0; j < IMAGE_HEIGHT; j++) {
cx = MIN_X;
for (i = 0; i < IMAGE_WIDTH; i++) {
x = cx;
y = cy;
x2 = x * x;
y2 = y * y;
// compute the orbit z, f(z), f^2(z), f^3(z), ...
// count the iterations until the orbit leaves the circle |z|=2.
// stop if the number of iterations exceeds the bound MAX_ITERS.
// >>>>>>>> CODE IS MISSING
// <<<<<<<< CODE IS MISSING
// n indicates if the point belongs to the mandelbrot set
// plot the number of iterations at point (i, j)
int c = ((long)n * 255) / MAX_ITERS;
png_plot(pPng, i, j, c, c, c);
cx += fDeltaX;
cy += fDeltaY;
unsigned long nTimeEnd = get_time();
// print benchmark data
printf("Total time: %g millisconds\n",
(nTimeEnd - nTimeStart) / 1000.0);
printf("Image size: %ld x %ld = %ld Pixels\n",
printf("Total number of iterations: %ld\n", nTotalIterationsCount);
printf("Avg. time per pixel: %g microseconds\n",
(nTimeEnd - nTimeStart) / (double)(IMAGE_WIDTH * IMAGE_HEIGHT));
printf("Avg. time per iteration: %g microseconds\n",
(nTimeEnd - nTimeStart) / (double)nTotalIterationsCount);
printf("Iterations/second: %g\n",
nTotalIterationsCount / (double)(nTimeEnd - nTimeStart) * 1e6);
// assume there are 8 floating point operations per iteration
printf("MFlop/s: %g\n",
nTotalIterationsCount * 8.0 / (double)(nTimeEnd - nTimeStart));
png_write(pPng, "mandel.png");
return 0;

View file

@ -0,0 +1,66 @@
#include "pngwriter.h"
#include <stdlib.h>
png_data *png_create(int nWidth, int nHeight) {
int i;
png_data *pData = (png_data *)malloc(sizeof(png_data));
pData->nWidth = nWidth;
pData->nHeight = nHeight;
pData->pPixels = (png_bytepp)malloc(nHeight * sizeof(png_bytep));
for (i = 0; i < nHeight; i++)
pData->pPixels[i] = (png_bytep)malloc(3 * nWidth * sizeof(png_byte));
return pData;
#define CHECK_RGB_BOUNDS(x) \
if (x > 255) \
x = 255; \
if (x < 0) \
x = 0;
void png_plot(png_data *pData, int x, int y, int r, int g, int b) {
if (x >= pData->nWidth)
if (y >= pData->nHeight)
pData->pPixels[pData->nHeight - y - 1][3 * x - 3] = (char)r;
pData->pPixels[pData->nHeight - y - 1][3 * x - 2] = (char)g;
pData->pPixels[pData->nHeight - y - 1][3 * x - 1] = (char)b;
void png_write(png_data *pData, char *szFileName) {
FILE *fp;
png_structp png_ptr;
png_infop info_ptr;
fp = fopen(szFileName, "wb");
if (fp == NULL)
png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
info_ptr = png_create_info_struct(png_ptr);
png_init_io(png_ptr, fp);
png_set_compression_level(png_ptr, PNGWRITER_DEFAULT_COMPRESSION);
png_set_IHDR(png_ptr, info_ptr, pData->nWidth, pData->nHeight, 8,
png_set_gAMA(png_ptr, info_ptr, 0.7);
png_write_info(png_ptr, info_ptr);
png_write_image(png_ptr, pData->pPixels);
png_write_end(png_ptr, info_ptr);
png_destroy_write_struct(&png_ptr, &info_ptr);

View file

@ -0,0 +1,18 @@
#ifndef PNGWRITER_H_
#define PNGWRITER_H_
#include <png.h>
typedef struct {
png_bytepp pPixels;
int nWidth;
int nHeight;
} png_data;
png_data *png_create(int nWidth, int nHeight);
void png_plot(png_data *pData, int x, int y, int r, int g, int b);
void png_write(png_data *pData, char *szFileName);
#endif /*PNGWRITER_H_*/

Project2/project2.pdf Normal file

Binary file not shown.

Project2/project2_intro.pdf Normal file

Binary file not shown.