



Local_A = Local_matrix_allocate(n_bar) // Memory for submatrix A in each process MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD) Setup_grid(&grid) // Setup the grid info of process Void Fox(int n, GRID_INFO_T* grid, LOCAL_MATRIX_T* local_A, // Fox`s algorithm Void Setup_grid(GRID_INFO_T* grid) // Setup the grid info of process GRID_INFO_T grid // Grid info of processĭouble begin_time, end_time, interval_p, interval_s MPI_Datatype local_matrix_mpi_t // New MPI type for matrix Void Build_matrix_type(LOCAL_MATRIX_T* local_A) // Creation of new MPI type for matrix LOCAL_MATRIX_T* local_B, LOCAL_MATRIX_T* local_C) Void Local_matrix_multiply(LOCAL_MATRIX_T* local_A, // Local matrix multiply Void Set_to_zero(LOCAL_MATRIX_T* local_A) // Fills the matrix with zeroes Void Print_matrix(char* title, LOCAL_MATRIX_T* local_A, // Prints the matrix on stdout Void Generate_matrix(char* title, LOCAL_MATRIX_T* local_A, // Fills the matrix Void Free_local_matrix(LOCAL_MATRIX_T** local_A) // Free memory LOCAL_MATRIX_T* Local_matrix_allocate(int n_bar) // Memory for matrix #define RandVal rand()%10 // Value which filled the matrix Int my_rank /* My rank in the grid comm */ MPI_Comm col_comm /* Communicator for my col */ MPI_Comm row_comm /* Communicator for my row */ MPI_Comm comm /* Communicator for entire grid */ I have this matrix multiplication code that works fine on MPI and I am trying to add Openmp directives to this code to make use of my quad-core resources, can someone please tell me where and what to add to this code to make it work for hybrid openmp/mpi.
