2023-10-04 18:07:09 -03:00
|
|
|
|
/**
|
|
|
|
|
* Question 1: Run the program with the problem size of 1000 and 10 threads, what is the approximate speedup you are achieving?
|
|
|
|
|
*
|
|
|
|
|
* Answer 1: On my home machine single threading took 0.640239 seconds and multithreading took 0.194644 seconds.
|
|
|
|
|
* On the lab machine single threading took 0.697472 seconds and multithreading took 0.114415 seconds.
|
|
|
|
|
* This means that the my home machine is 3.29 times faster and the lab machine is 6.10 times faster.
|
|
|
|
|
*
|
|
|
|
|
* Question 2: Is there a problem size / number of threads combination that slows down the computation process? Why do you think it is happening?
|
|
|
|
|
*
|
|
|
|
|
* Answer 2: There is a problem with having too many threads for the number of hardware threads available. Increasing past this only
|
|
|
|
|
* increases the overhead of creating and managing the threads. This is because the threads are not running in parallel and are instead
|
|
|
|
|
* being switched between by the OS. At lower matrix sizes the cost of creating threads and managing them is greater than the cost of
|
|
|
|
|
* just doing the computation in a single thread, so any combination where threads > the number of hardware threads will be slower, and
|
|
|
|
|
* as the matrix size approaches 1, than the greater effect thread creation and management will have on the speed of the program.
|
|
|
|
|
*
|
|
|
|
|
* Question 3: What is the minimum size of the problem that benefits from creating an extra thread?
|
|
|
|
|
*
|
|
|
|
|
* Answer 3: The lowest size on my home machine that consistently benefited from an extra thread was 150, but this can change depending
|
|
|
|
|
* on the specifications of the machine.
|
|
|
|
|
*
|
|
|
|
|
* Question 4: Does using the threads always improve execution duration?
|
|
|
|
|
*
|
|
|
|
|
* Answer 4: No, as the number of threads increases past the number of hardware threads available the execution duration increases
|
|
|
|
|
* due to managing the threads, as well as low size matrices where the cost of creating and managing threads is greater than the
|
|
|
|
|
* cost of just doing the computation in a single thread.
|
|
|
|
|
*
|
|
|
|
|
* Question 5: Guesstimate and comment on the nature of growth of the speedup with the number of threads – is it linear, exponential, are there any limits?
|
|
|
|
|
*
|
|
|
|
|
* Answer 5: The speedup is linear up to the number of hardware threads available, given a large enough matrix size.
|
|
|
|
|
* Given a matrix of size x, the speedup of using y threads is approximately x/y, up to the number of hardware threads available.
|
|
|
|
|
* After this the speedup will decrease as the number of threads increases, due to the overhead of creating and managing threads.
|
|
|
|
|
**/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
#include <time.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <pthread.h>
|
2023-10-04 13:41:07 -03:00
|
|
|
|
|
|
|
|
|
#define MAXN 5
|
|
|
|
|
|
|
|
|
|
int **generate_square_matrix(int size) {
|
|
|
|
|
int **array = malloc(sizeof(int *) * size);
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
array[i] = malloc(sizeof(int) * size);
|
|
|
|
|
memset(array[i], 0, sizeof(int) * size);
|
|
|
|
|
}
|
|
|
|
|
return array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int **generate_square_matrix_and_fill_it(int size) {
|
|
|
|
|
int **array = generate_square_matrix(size);
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
for (int j = 0; j < size; j++) {
|
|
|
|
|
array[i][j] = rand() % MAXN;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void print_square_matrix(int **array, int size) {
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
for (int j = 0; j < size; j++) {
|
|
|
|
|
printf("%i ", array[i][j]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int check_if_matrices_differ(int **array, int **array2, int size) {
|
|
|
|
|
int result = 0;
|
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
|
for (int j = 0; j < size; j++) {
|
|
|
|
|
result += array[i][j] - array2[i][j];
|
|
|
|
|
if (result != 0) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typedef struct _params {
|
|
|
|
|
int **first_array;
|
|
|
|
|
int **second_array;
|
|
|
|
|
int **result;
|
|
|
|
|
int max_threads;
|
|
|
|
|
int row_index;
|
|
|
|
|
int size;
|
|
|
|
|
} ThreadParams;
|
|
|
|
|
|
|
|
|
|
void multiply_matrices(void *threadParams) {
|
|
|
|
|
ThreadParams *t = (ThreadParams *) threadParams;
|
|
|
|
|
int N = t->size;
|
|
|
|
|
int row = t->row_index;
|
|
|
|
|
int column = 0;
|
|
|
|
|
int temp_result = 0;
|
|
|
|
|
while (row < N) {
|
|
|
|
|
column = 0;
|
|
|
|
|
while (column < N) {
|
|
|
|
|
temp_result = 0;
|
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
|
|
|
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
|
|
|
|
|
}
|
|
|
|
|
t->result[row][column] = temp_result;
|
|
|
|
|
column = column + 1;
|
|
|
|
|
}
|
|
|
|
|
row = row + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *multiply_matrices_threaded(void *threadParams) {
|
|
|
|
|
/**
|
|
|
|
|
* write a code for matrix multiplication that will utilize the
|
|
|
|
|
* threading capacity and parallelize the computation in such a
|
|
|
|
|
* way that a thread computes result per one or more rows
|
|
|
|
|
*/
|
2023-10-04 18:07:09 -03:00
|
|
|
|
ThreadParams *t = (ThreadParams *) threadParams;
|
|
|
|
|
int N = t->size;
|
|
|
|
|
int row = t->row_index;
|
|
|
|
|
int column = 0;
|
|
|
|
|
int temp_result = 0;
|
|
|
|
|
while (row < N) {
|
|
|
|
|
column = 0;
|
|
|
|
|
while (column < N) {
|
|
|
|
|
temp_result = 0;
|
|
|
|
|
for (int i = 0; i < t->size; i++) {
|
|
|
|
|
temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column];
|
|
|
|
|
}
|
|
|
|
|
t->result[row][column] = temp_result;
|
|
|
|
|
column = column + 1;
|
|
|
|
|
}
|
|
|
|
|
row += t->max_threads;
|
|
|
|
|
}
|
2023-10-04 18:12:23 -03:00
|
|
|
|
return NULL;
|
2023-10-04 13:41:07 -03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
|
if (argc != 3) {
|
|
|
|
|
printf("Please provide size of the matrix and the number of threads to execute\n");
|
|
|
|
|
exit(0);
|
|
|
|
|
}
|
|
|
|
|
int size = atoi(argv[1]);
|
|
|
|
|
int max_threads = atoi(argv[2]);
|
|
|
|
|
// The value you pass to srand determines the random sequence
|
|
|
|
|
srand(time(NULL)); // Line to initialize the random number generator.
|
|
|
|
|
int **array1 = generate_square_matrix_and_fill_it(size);
|
|
|
|
|
int **array2 = generate_square_matrix_and_fill_it(size);
|
|
|
|
|
int **result = generate_square_matrix(size); // generate an empty matrix
|
|
|
|
|
struct timeval begin;
|
|
|
|
|
struct timeval end;
|
|
|
|
|
gettimeofday(&begin, NULL); // fills the contents with time since the beginning of epoch
|
|
|
|
|
ThreadParams *thr = (ThreadParams *) malloc(
|
|
|
|
|
sizeof(ThreadParams)); // allocate a structure for holding function parameters
|
|
|
|
|
thr->first_array = array1; // first matrix to multiply
|
|
|
|
|
thr->second_array = array2; // the second matrix to multiply
|
|
|
|
|
thr->result = result; // where to store the results - note it needs to be generated
|
|
|
|
|
thr->row_index = 0; // this variable, in combination with max_threads can be used for parallelization
|
|
|
|
|
thr->size = size;
|
|
|
|
|
thr->max_threads = max_threads;
|
|
|
|
|
|
|
|
|
|
multiply_matrices((void *) thr);
|
|
|
|
|
|
|
|
|
|
gettimeofday(&end, NULL); // fills the contents with time since the beginning of epoch
|
|
|
|
|
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
|
|
|
|
|
long long microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
|
|
|
|
|
double duration = (1.0 * microseconds) / 1000000;
|
|
|
|
|
printf("Single threaded took %lf seconds to execute \n", duration);
|
|
|
|
|
int **threaded_result = generate_square_matrix(size);
|
|
|
|
|
gettimeofday(&begin, NULL);
|
|
|
|
|
/**
|
|
|
|
|
* Write your code to create and use max_threads here, such that the threaded_result
|
|
|
|
|
* is populated with the result of the computation.
|
|
|
|
|
*/
|
2023-10-04 18:07:09 -03:00
|
|
|
|
thr->result = threaded_result;
|
|
|
|
|
pthread_t threads[max_threads];
|
|
|
|
|
for (int i = 0; i < max_threads; i++) {
|
|
|
|
|
ThreadParams *params = (ThreadParams *) malloc(sizeof(ThreadParams));
|
|
|
|
|
params->first_array = array1;
|
|
|
|
|
params->second_array = array2;
|
|
|
|
|
params->result = threaded_result;
|
|
|
|
|
params->row_index = i;
|
|
|
|
|
params->size = size;
|
|
|
|
|
params->max_threads = max_threads;
|
|
|
|
|
pthread_create(&threads[i], NULL, &multiply_matrices_threaded, (void *) params);
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < max_threads; i++) {
|
|
|
|
|
pthread_join(threads[i], NULL);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-04 13:41:07 -03:00
|
|
|
|
gettimeofday(&end, NULL);
|
|
|
|
|
//The next line is inspired by https://linuxhint.com/gettimeofday_c_language/
|
|
|
|
|
microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
|
|
|
|
|
duration = (1.0 * microseconds) / 1000000;
|
|
|
|
|
printf("Multi-threaded took %lf seconds to execute \n", duration);
|
|
|
|
|
|
|
|
|
|
if (check_if_matrices_differ(result, threaded_result, size) != 0) {
|
|
|
|
|
printf("Threaded result differ from single core computation, error\n");
|
2023-10-04 18:07:09 -03:00
|
|
|
|
exit(1);
|
2023-10-04 13:41:07 -03:00
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|