CS3413/Lab3/main.c

203 lines
8.1 KiB
C
Raw Normal View History

2023-10-04 18:07:09 -03:00
/**
* Question 1: Run the program with the problem size of 1000 and 10 threads, what is the approximate speedup you are achieving?
*
* Answer 1: On my home machine single threading took 0.640239 seconds and multithreading took 0.194644 seconds.
* On the lab machine single threading took 0.697472 seconds and multithreading took 0.114415 seconds.
* This means that the my home machine is 3.29 times faster and the lab machine is 6.10 times faster.
*
* Question 2: Is there a problem size / number of threads combination that slows down the computation process? Why do you think it is happening?
*
* Answer 2: There is a problem with having too many threads for the number of hardware threads available. Increasing past this only
* increases the overhead of creating and managing the threads. This is because the threads are not running in parallel and are instead
* being switched between by the OS. At lower matrix sizes the cost of creating threads and managing them is greater than the cost of
* just doing the computation in a single thread, so any combination where threads > the number of hardware threads will be slower, and
* as the matrix size approaches 1, than the greater effect thread creation and management will have on the speed of the program.
*
* Question 3: What is the minimum size of the problem that benefits from creating an extra thread?
*
* Answer 3: The lowest size on my home machine that consistently benefited from an extra thread was 150, but this can change depending
* on the specifications of the machine.
*
* Question 4: Does using the threads always improve execution duration?
*
* Answer 4: No, as the number of threads increases past the number of hardware threads available the execution duration increases
* due to managing the threads, as well as low size matrices where the cost of creating and managing threads is greater than the
* cost of just doing the computation in a single thread.
*
* Question 5: Guesstimate and comment on the nature of growth of the speedup with the number of threads is it linear, exponential, are there any limits?
*
* Answer 5: The speedup is linear up to the number of hardware threads available, given a large enough matrix size.
* Given a matrix of size x, the speedup of using y threads is approximately x/y, up to the number of hardware threads available.
* After this the speedup will decrease as the number of threads increases, due to the overhead of creating and managing threads.
**/
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
2023-10-04 13:41:07 -03:00
#define MAXN 5
int **generate_square_matrix(int size) {
int **array = malloc(sizeof(int *) * size);
for (int i = 0; i < size; i++) {
array[i] = malloc(sizeof(int) * size);
memset(array[i], 0, sizeof(int) * size);
}
return array;
}
int **generate_square_matrix_and_fill_it(int size) {
int **array = generate_square_matrix(size);
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
array[i][j] = rand() % MAXN;
}
}
return array;
}
void print_square_matrix(int **array, int size) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
printf("%i ", array[i][j]);
}
printf("\n");
}
}
int check_if_matrices_differ(int **array, int **array2, int size) {
int result = 0;
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
result += array[i][j] - array2[i][j];
if (result != 0) {
return result;
}
}
}
return result;
}
typedef struct _params {
int **first_array;
int **second_array;
int **result;
int max_threads;
int row_index;
int size;
} ThreadParams;
void multiply_matrices(void *threadParams) {
ThreadParams *t = (ThreadParams *) threadParams;
int N = t->size;
int row = t->row_index;
int column = 0;
int temp_result = 0;
while (row < N) {
column = 0;
while (column < N) {
temp_result = 0;
for (int i = 0; i < N; i++) {
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
}
t->result[row][column] = temp_result;
column = column + 1;
}
row = row + 1;
}
}
void *multiply_matrices_threaded(void *threadParams) {
/**
* write a code for matrix multiplication that will utilize the
* threading capacity and parallelize the computation in such a
* way that a thread computes result per one or more rows
*/
2023-10-04 18:07:09 -03:00
ThreadParams *t = (ThreadParams *) threadParams;
int N = t->size;
int row = t->row_index;
int column = 0;
int temp_result = 0;
while (row < N) {
column = 0;
while (column < N) {
temp_result = 0;
for (int i = 0; i < t->size; i++) {
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
}
t->result[row][column] = temp_result;
column = column + 1;
}
row += t->max_threads;
}
2023-10-04 18:12:23 -03:00
return NULL;
2023-10-04 13:41:07 -03:00
}
int main(int argc, char **argv) {
if (argc != 3) {
printf("Please provide size of the matrix and the number of threads to execute\n");
exit(0);
}
int size = atoi(argv[1]);
int max_threads = atoi(argv[2]);
// The value you pass to srand determines the random sequence
srand(time(NULL)); // Line to initialize the random number generator.
int **array1 = generate_square_matrix_and_fill_it(size);
int **array2 = generate_square_matrix_and_fill_it(size);
int **result = generate_square_matrix(size); // generate an empty matrix
struct timeval begin;
struct timeval end;
gettimeofday(&begin, NULL); // fills the contents with time since the beginning of epoch
ThreadParams *thr = (ThreadParams *) malloc(
sizeof(ThreadParams)); // allocate a structure for holding function parameters
thr->first_array = array1; // first matrix to multiply
thr->second_array = array2; // the second matrix to multiply
thr->result = result; // where to store the results - note it needs to be generated
thr->row_index = 0; // this variable, in combination with max_threads can be used for parallelization
thr->size = size;
thr->max_threads = max_threads;
multiply_matrices((void *) thr);
gettimeofday(&end, NULL); // fills the contents with time since the beginning of epoch
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
long long microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
double duration = (1.0 * microseconds) / 1000000;
printf("Single threaded took %lf seconds to execute \n", duration);
int **threaded_result = generate_square_matrix(size);
gettimeofday(&begin, NULL);
/**
* Write your code to create and use max_threads here, such that the threaded_result
* is populated with the result of the computation.
*/
2023-10-04 18:07:09 -03:00
thr->result = threaded_result;
pthread_t threads[max_threads];
for (int i = 0; i < max_threads; i++) {
ThreadParams *params = (ThreadParams *) malloc(sizeof(ThreadParams));
params->first_array = array1;
params->second_array = array2;
params->result = threaded_result;
params->row_index = i;
params->size = size;
params->max_threads = max_threads;
pthread_create(&threads[i], NULL, &multiply_matrices_threaded, (void *) params);
}
for (int i = 0; i < max_threads; i++) {
pthread_join(threads[i], NULL);
}
2023-10-04 13:41:07 -03:00
gettimeofday(&end, NULL);
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
duration = (1.0 * microseconds) / 1000000;
printf("Multi-threaded took %lf seconds to execute \n", duration);
if (check_if_matrices_differ(result, threaded_result, size) != 0) {
printf("Threaded result differ from single core computation, error\n");
2023-10-04 18:07:09 -03:00
exit(1);
2023-10-04 13:41:07 -03:00
}
return 0;
}