This repository has been archived on 2022-10-18. You can view files and clone it, but cannot push or open issues or pull requests.
HPC/Project1/project_1_maggioni_claudio/matmult/dgemm-blocked.c

58 lines
1.8 KiB
C

#include <string.h>
/*
Please include compiler name below (you may also include any other modules you would like to be loaded)
COMPILER= gnu
Please include All compiler flags and libraries as you want them run. You can simply copy this over from the Makefile's first few lines
CC = cc
OPT = -O3
CFLAGS = -Wall -std=gnu99 $(OPT)
MKLROOT = /opt/intel/composer_xe_2013.1.117/mkl
LDLIBS = -lrt -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread -lm
*/
const char* dgemm_desc = "Block-based dgemm.";
const int block_size = 50;
inline int min(int a, int b) {
return a < b ? a : b;
}
inline void naivemm(int r_min, int r_max, int k_min, int k_max, int c_min, int c_max, int n, double* A, double* B, double* C) {
/* For each row i of A */
for (int i = r_min; i < r_max; ++i) {
/* For each column j of B */
for (int j = c_min; j < c_max; ++j) {
for(int k = k_min; k < k_max; k++) {
C[i + j * n] += A[i + k * n] * B[k + j * n];
}
}
}
}
/* This routine performs a dgemm operation
* C := C + A * B
* where A, B, and C are lda-by-lda matrices stored in column-major format.
* On exit, A and B maintain their input values. */
void square_dgemm(int n, double* A, double* B, double* C) {
/* For each row i of A */
for (int i = 0; i < n; i += block_size) {
int i_next = min(i + block_size, n);
/* For each column j of B */
for (int j = 0; j < n; j += block_size) {
int j_next = min(j + block_size, n);
for (int k = 0; k < n; k += block_size) {
int k_next = min(k + block_size, n);
naivemm(i, i_next, k, k_next, j, j_next, n, A, B, C);
}
}
}
}