CS3413/Lab3/main.c

203 lines
8.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Question 1: Run the program with the problem size of 1000 and 10 threads, what is the approximate speedup you are achieving?
*
* Answer 1: On my home machine single threading took 0.640239 seconds and multithreading took 0.194644 seconds.
* On the lab machine single threading took 0.697472 seconds and multithreading took 0.114415 seconds.
* This means that the my home machine is 3.29 times faster and the lab machine is 6.10 times faster.
*
* Question 2: Is there a problem size / number of threads combination that slows down the computation process? Why do you think it is happening?
*
* Answer 2: There is a problem with having too many threads for the number of hardware threads available. Increasing past this only
* increases the overhead of creating and managing the threads. This is because the threads are not running in parallel and are instead
* being switched between by the OS. At lower matrix sizes the cost of creating threads and managing them is greater than the cost of
* just doing the computation in a single thread, so any combination where threads > the number of hardware threads will be slower, and
* as the matrix size approaches 1, than the greater effect thread creation and management will have on the speed of the program.
*
* Question 3: What is the minimum size of the problem that benefits from creating an extra thread?
*
* Answer 3: The lowest size on my home machine that consistently benefited from an extra thread was 150, but this can change depending
* on the specifications of the machine.
*
* Question 4: Does using the threads always improve execution duration?
*
* Answer 4: No, as the number of threads increases past the number of hardware threads available the execution duration increases
* due to managing the threads, as well as low size matrices where the cost of creating and managing threads is greater than the
* cost of just doing the computation in a single thread.
*
* Question 5: Guesstimate and comment on the nature of growth of the speedup with the number of threads is it linear, exponential, are there any limits?
*
* Answer 5: The speedup is linear up to the number of hardware threads available, given a large enough matrix size.
* Given a matrix of size x, the speedup of using y threads is approximately x/y, up to the number of hardware threads available.
* After this the speedup will decrease as the number of threads increases, due to the overhead of creating and managing threads.
**/
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#define MAXN 5
int **generate_square_matrix(int size) {
int **array = malloc(sizeof(int *) * size);
for (int i = 0; i < size; i++) {
array[i] = malloc(sizeof(int) * size);
memset(array[i], 0, sizeof(int) * size);
}
return array;
}
int **generate_square_matrix_and_fill_it(int size) {
int **array = generate_square_matrix(size);
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
array[i][j] = rand() % MAXN;
}
}
return array;
}
void print_square_matrix(int **array, int size) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
printf("%i ", array[i][j]);
}
printf("\n");
}
}
int check_if_matrices_differ(int **array, int **array2, int size) {
int result = 0;
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
result += array[i][j] - array2[i][j];
if (result != 0) {
return result;
}
}
}
return result;
}
typedef struct _params {
int **first_array;
int **second_array;
int **result;
int max_threads;
int row_index;
int size;
} ThreadParams;
void multiply_matrices(void *threadParams) {
ThreadParams *t = (ThreadParams *) threadParams;
int N = t->size;
int row = t->row_index;
int column = 0;
int temp_result = 0;
while (row < N) {
column = 0;
while (column < N) {
temp_result = 0;
for (int i = 0; i < N; i++) {
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
}
t->result[row][column] = temp_result;
column = column + 1;
}
row = row + 1;
}
}
void *multiply_matrices_threaded(void *threadParams) {
/**
* write a code for matrix multiplication that will utilize the
* threading capacity and parallelize the computation in such a
* way that a thread computes result per one or more rows
*/
ThreadParams *t = (ThreadParams *) threadParams;
int N = t->size;
int row = t->row_index;
int column = 0;
int temp_result = 0;
while (row < N) {
column = 0;
while (column < N) {
temp_result = 0;
for (int i = 0; i < t->size; i++) {
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
}
t->result[row][column] = temp_result;
column = column + 1;
}
row += t->max_threads;
}
return NULL;
}
int main(int argc, char **argv) {
if (argc != 3) {
printf("Please provide size of the matrix and the number of threads to execute\n");
exit(0);
}
int size = atoi(argv[1]);
int max_threads = atoi(argv[2]);
// The value you pass to srand determines the random sequence
srand(time(NULL)); // Line to initialize the random number generator.
int **array1 = generate_square_matrix_and_fill_it(size);
int **array2 = generate_square_matrix_and_fill_it(size);
int **result = generate_square_matrix(size); // generate an empty matrix
struct timeval begin;
struct timeval end;
gettimeofday(&begin, NULL); // fills the contents with time since the beginning of epoch
ThreadParams *thr = (ThreadParams *) malloc(
sizeof(ThreadParams)); // allocate a structure for holding function parameters
thr->first_array = array1; // first matrix to multiply
thr->second_array = array2; // the second matrix to multiply
thr->result = result; // where to store the results - note it needs to be generated
thr->row_index = 0; // this variable, in combination with max_threads can be used for parallelization
thr->size = size;
thr->max_threads = max_threads;
multiply_matrices((void *) thr);
gettimeofday(&end, NULL); // fills the contents with time since the beginning of epoch
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
long long microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
double duration = (1.0 * microseconds) / 1000000;
printf("Single threaded took %lf seconds to execute \n", duration);
int **threaded_result = generate_square_matrix(size);
gettimeofday(&begin, NULL);
/**
* Write your code to create and use max_threads here, such that the threaded_result
* is populated with the result of the computation.
*/
thr->result = threaded_result;
pthread_t threads[max_threads];
for (int i = 0; i < max_threads; i++) {
ThreadParams *params = (ThreadParams *) malloc(sizeof(ThreadParams));
params->first_array = array1;
params->second_array = array2;
params->result = threaded_result;
params->row_index = i;
params->size = size;
params->max_threads = max_threads;
pthread_create(&threads[i], NULL, &multiply_matrices_threaded, (void *) params);
}
for (int i = 0; i < max_threads; i++) {
pthread_join(threads[i], NULL);
}
gettimeofday(&end, NULL);
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
duration = (1.0 * microseconds) / 1000000;
printf("Multi-threaded took %lf seconds to execute \n", duration);
if (check_if_matrices_differ(result, threaded_result, size) != 0) {
printf("Threaded result differ from single core computation, error\n");
exit(1);
}
return 0;
}