/** * Question 1: Run the program with the problem size of 1000 and 10 threads, what is the approximate speedup you are achieving? * * Answer 1: On my home machine single threading took 0.640239 seconds and multithreading took 0.194644 seconds. * On the lab machine single threading took 0.697472 seconds and multithreading took 0.114415 seconds. * This means that the my home machine is 3.29 times faster and the lab machine is 6.10 times faster. * * Question 2: Is there a problem size / number of threads combination that slows down the computation process? Why do you think it is happening? * * Answer 2: There is a problem with having too many threads for the number of hardware threads available. Increasing past this only * increases the overhead of creating and managing the threads. This is because the threads are not running in parallel and are instead * being switched between by the OS. At lower matrix sizes the cost of creating threads and managing them is greater than the cost of * just doing the computation in a single thread, so any combination where threads > the number of hardware threads will be slower, and * as the matrix size approaches 1, than the greater effect thread creation and management will have on the speed of the program. * * Question 3: What is the minimum size of the problem that benefits from creating an extra thread? * * Answer 3: The lowest size on my home machine that consistently benefited from an extra thread was 150, but this can change depending * on the specifications of the machine. * * Question 4: Does using the threads always improve execution duration? * * Answer 4: No, as the number of threads increases past the number of hardware threads available the execution duration increases * due to managing the threads, as well as low size matrices where the cost of creating and managing threads is greater than the * cost of just doing the computation in a single thread. * * Question 5: Guesstimate and comment on the nature of growth of the speedup with the number of threads – is it linear, exponential, are there any limits? * * Answer 5: The speedup is linear up to the number of hardware threads available, given a large enough matrix size. * Given a matrix of size x, the speedup of using y threads is approximately x/y, up to the number of hardware threads available. * After this the speedup will decrease as the number of threads increases, due to the overhead of creating and managing threads. **/ #include #include #include #include #include #include #define MAXN 5 int **generate_square_matrix(int size) { int **array = malloc(sizeof(int *) * size); for (int i = 0; i < size; i++) { array[i] = malloc(sizeof(int) * size); memset(array[i], 0, sizeof(int) * size); } return array; } int **generate_square_matrix_and_fill_it(int size) { int **array = generate_square_matrix(size); for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { array[i][j] = rand() % MAXN; } } return array; } void print_square_matrix(int **array, int size) { for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { printf("%i ", array[i][j]); } printf("\n"); } } int check_if_matrices_differ(int **array, int **array2, int size) { int result = 0; for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { result += array[i][j] - array2[i][j]; if (result != 0) { return result; } } } return result; } typedef struct _params { int **first_array; int **second_array; int **result; int max_threads; int row_index; int size; } ThreadParams; void multiply_matrices(void *threadParams) { ThreadParams *t = (ThreadParams *) threadParams; int N = t->size; int row = t->row_index; int column = 0; int temp_result = 0; while (row < N) { column = 0; while (column < N) { temp_result = 0; for (int i = 0; i < N; i++) { temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column]; } t->result[row][column] = temp_result; column = column + 1; } row = row + 1; } } void *multiply_matrices_threaded(void *threadParams) { /** * write a code for matrix multiplication that will utilize the * threading capacity and parallelize the computation in such a * way that a thread computes result per one or more rows */ ThreadParams *t = (ThreadParams *) threadParams; int N = t->size; int row = t->row_index; int column = 0; int temp_result = 0; while (row < N) { column = 0; while (column < N) { temp_result = 0; for (int i = 0; i < t->size; i++) { temp_result = temp_result + t->first_array[row][i] * t->second_array[i][column]; } t->result[row][column] = temp_result; column = column + 1; } row += t->max_threads; } return NULL; } int main(int argc, char **argv) { if (argc != 3) { printf("Please provide size of the matrix and the number of threads to execute\n"); exit(0); } int size = atoi(argv[1]); int max_threads = atoi(argv[2]); // The value you pass to srand determines the random sequence srand(time(NULL)); // Line to initialize the random number generator. int **array1 = generate_square_matrix_and_fill_it(size); int **array2 = generate_square_matrix_and_fill_it(size); int **result = generate_square_matrix(size); // generate an empty matrix struct timeval begin; struct timeval end; gettimeofday(&begin, NULL); // fills the contents with time since the beginning of epoch ThreadParams *thr = (ThreadParams *) malloc( sizeof(ThreadParams)); // allocate a structure for holding function parameters thr->first_array = array1; // first matrix to multiply thr->second_array = array2; // the second matrix to multiply thr->result = result; // where to store the results - note it needs to be generated thr->row_index = 0; // this variable, in combination with max_threads can be used for parallelization thr->size = size; thr->max_threads = max_threads; multiply_matrices((void *) thr); gettimeofday(&end, NULL); // fills the contents with time since the beginning of epoch //The next line is inspired by https://linuxhint.com/gettimeofday_c_language/ long long microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); double duration = (1.0 * microseconds) / 1000000; printf("Single threaded took %lf seconds to execute \n", duration); int **threaded_result = generate_square_matrix(size); gettimeofday(&begin, NULL); /** * Write your code to create and use max_threads here, such that the threaded_result * is populated with the result of the computation. */ thr->result = threaded_result; pthread_t threads[max_threads]; for (int i = 0; i < max_threads; i++) { ThreadParams *params = (ThreadParams *) malloc(sizeof(ThreadParams)); params->first_array = array1; params->second_array = array2; params->result = threaded_result; params->row_index = i; params->size = size; params->max_threads = max_threads; pthread_create(&threads[i], NULL, &multiply_matrices_threaded, (void *) params); } for (int i = 0; i < max_threads; i++) { pthread_join(threads[i], NULL); } gettimeofday(&end, NULL); //The next line is inspired by https://linuxhint.com/gettimeofday_c_language/ microseconds = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); duration = (1.0 * microseconds) / 1000000; printf("Multi-threaded took %lf seconds to execute \n", duration); if (check_if_matrices_differ(result, threaded_result, size) != 0) { printf("Threaded result differ from single core computation, error\n"); exit(1); } return 0; }