Kao sto rekoh, imam jako malo slobodnog vremena, ali evo danas uspeh da ti "sklopim" nesto. Ispod ti je kod, moguce da ti neki momenti nece biti najjasniji, pa pitaj ako to bude slucaj, a ja cu ti odgovoriti kad ugrabim vremena. Ideja ovog algoritma je da svaki proces dobije odredjeni deo elemenata da proracuna. Uzmimo za primer mnozenje kvadratnih matrica dimenzije 4 u kom ucestvuje 5 procesa. Tada bi procesi dobili da racunaju sledece elemente:
proces 0: (0, 0) (0, 1) (0, 2)
proces 1: (0, 3) (1, 0) (1, 1)
proces 2: (1, 2) (1, 3) (2, 0)
proces 3: (2, 1) (2, 2) (2, 3)
proces 4: (3, 0) (3, 1) (3, 2) (3, 3)
Sto se tice podele matrica, ideja je da svaki proces dobije samo one elemente redova prve matrice i kolona druge matrice koji su mu neophodni za racun elemenata za koje je zaduzen. Zbog toga ce raspodela redova i kolona medju procesima biti ovakva:
proces 0: redovi-0, kolone-0,1,2
proces 1: redovi-0,1 kolone-0,1,2,3 (napomena: kolona 2 mu zapravo ne treba, ali je saljemo zbog ustede na komunikaciji. Naime, MPI ne podrzava da "preskocis" nesto u buffer-u kao sto bi nam u ovom slucaju bilo zgodno jer bismo trebali da saljemo elemente nulte i prve kolone, pa onda da preskocimo drugu i posaljemo trecu. Posto to nije podrzano, za ovako nesto bismo morali da razmenimo dve poruke sto generalno treba izbegavati jer je komunikacija medju procesima ono sto najvise usporava paralelne programe)
proces 2: redovi-1,2 kolone-0,1,2,3 (napomena: kolona 1 je nepotrebna, ali se salje iz istog razloga kog sam navela iznad)
proces 3: redovi-2 kolone-1,2,3
proces 4: redovi-3 kolone-0,1,2,3
Sto se tice poredjenja MPI-a i OpenMP-a, ne bih nijedan od ova dva okarakterisala laksim ili tezim. Imaju drugaciju svrhu, i u skladu s potrebama treba se odluciti za jedan od njih.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
void matprint(char *matname, double *mat, long N) {
long i, j;
fprintf(stdout, "\n------------%s------------", matname);
for (i = 0; i < N; i++) {
fprintf(stdout, "\n");
for (j = 0; j < N; j++)
fprintf(stdout, "%f ", mat[i * N + j]);
}
fprintf(stdout, "\n--------------------------\n");
}
void matfill(long N, double *mat, double val) {
long i, j;
for(i = 0; i < N; i ++)
for(j = 0; j < N; j ++) {
mat[i * N + j] = val;
}
}
int matmul(int commSize, int myRank, long N, double *rows, double *columns, double *result) {
long i, j, count, index, start, end, startRow, startColumn, row, column;
if (myRank >= N*N) {
result[0] = 0;
return 1;
}
count = (N*N)/commSize;
if (count == 0) count = 1;
start = myRank * count;
end = start + count;
if (myRank==commSize-1)
end += (N*N)%commSize;
startRow = start / N;
if (start / N == (end - 1) / N) {
startColumn = start % N;
} else {
startColumn = 0;
}
index = 0;
for (i = start; i < end; i ++) {
result[index] = 0;
row = i / N - startRow;
column = i % N - startColumn;
for (j = 0; j < N; j ++) {
result[index] += rows[row*N+j] * columns[column*N+j];
}
index++;
}
return end - start;
}
int dividerows(int commSize, int myRank, long N, double *mat, double *rows) {
int i, start, end, startRow, endRow, count, rowsForMyRank;
int *scounts = (int *)malloc(commSize*sizeof(int));
int *displs = (int *)malloc(commSize*sizeof(int));
count = (N*N)/commSize;
if (count == 0) count = 1;
for (i=0; i<commSize; i++) {
if (i < N*N)
{
start = i * count;
end = start + count - 1;
if (i==commSize-1)
end += (N*N)%commSize;
startRow = start / N;
endRow = end / N;
scounts[i] = (endRow - startRow + 1) * N;
displs[i] = startRow * N;
} else {
scounts[i] = 1;
displs[i] = 0;
}
}
MPI_Scatterv(mat, scounts, displs, MPI_DOUBLE, rows, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
rowsForMyRank = scounts[myRank];
free(scounts);
free(displs);
return rowsForMyRank;
}
int dividecolumns(int commSize, int myRank, long N, double *mat, double *columns) {
int i, start, end, startColumn, endColumn, count, columnsForMyRank;
int *scounts = (int *)malloc(commSize*sizeof(int));
int *displs = (int *)malloc(commSize*sizeof(int));
MPI_Datatype col, coltype;
MPI_Type_vector(N, 1, N, MPI_DOUBLE, &col);
MPI_Type_commit(&col);
MPI_Type_create_resized(col, 0, 1*sizeof(double), &coltype);
MPI_Type_commit(&coltype);
count = (N*N)/commSize;
if (count == 0) count = 1;
for (i=0; i<commSize; i++) {
if (i < N*N) {
start = i * count;
end = start + count - 1;
if (i==commSize-1)
end += (N*N)%commSize;
if (start / N == end / N) {
startColumn = start % N;
endColumn = end % N;
} else {
startColumn = 0;
endColumn = N-1;
}
scounts[i] = endColumn - startColumn + 1;
displs[i] = startColumn;
//fprintf(stdout, "\n\n----------- Process %d. start %d. end %d. startRow %d. endRow %d. scounts %d. displs %d. \n", myRank, start, end, startColumn, endColumn, scounts[i], displs[i]);
} else {
scounts[i] = 1;
displs[i] = 0;
}
}
MPI_Scatterv(mat, scounts, displs, coltype, columns, N*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
columnsForMyRank = scounts[myRank] * N;
free(scounts);
free(displs);
return columnsForMyRank;
}
int main(int argc, char **argv) {
long N;
int i, commSize, myRank, rowsSize, columnsSize, resultCount, currentDispls, count;
int *rcounts, *displs;
double *A, *B, *C, t, *rows, *columns, *localResult;
int mpiInitSucceeded = MPI_Init(&argc, &argv);
if (mpiInitSucceeded != MPI_SUCCESS) {
printf("MPI failed to initialize.\n");
MPI_Abort(MPI_COMM_WORLD, mpiInitSucceeded);
}
MPI_Comm_size(MPI_COMM_WORLD, &commSize);
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
N = 0;
if (myRank == 0)
N = atoi(argv[1]);
MPI_Bcast(&N, 1, MPI_LONG, 0, MPI_COMM_WORLD);
A = (double *) malloc(N * N * sizeof(double));
B = (double *) malloc(N * N * sizeof(double));
C = (double *) malloc((N * N < commSize ? commSize : N * N) * sizeof(double));
if (myRank == 0) {
matfill(N, A, 1.0);
matfill(N, B, 2.0);
matfill(N, C, 0.0);
//t = MPI_Wtime();
//matmul(N, A, B, C);
//t = MPI_Wtime() - t;
//fprintf(stdout, "%ld\t%le\t%le\n", N, t, (2 * N - 1) * N * N / t);
//fflush(stdout);
}
rows = (double *)malloc(N*N*sizeof(double));
rowsSize = dividerows(commSize, myRank, N, A, rows);
columns = (double *)malloc(N*N*sizeof(double));
columnsSize = dividecolumns(commSize, myRank, N, B, columns);
/*if (myRank == 1) {
fprintf(stdout, "\n\n----------- Process 1 rows(%d): \n", rowsSize);
for (i=0; i<rowsSize; i++) {
fprintf(stdout, " %f ", rows[i]);
}
fprintf(stdout, "\n-----------\n");
}*/
localResult = (double *)malloc(N*N*sizeof(double));
resultCount = matmul(commSize, myRank, N, rows, columns, localResult);
rcounts = (int *)malloc(commSize*sizeof(int));
displs = (int *)malloc(commSize*sizeof(int));
count = (N*N)/commSize;
if (count == 0) count = 1;
currentDispls = 0;
for (i=0; i<commSize; i++) {
displs[i] = currentDispls;
if (i < N*N - 1) {
rcounts[i] = count;
currentDispls += count;
} else if (i == N*N - 1){
rcounts[i] = count + (N*N)%commSize;
currentDispls += count + (N*N)%commSize;
} else {
rcounts[i] = 1;
currentDispls++;
}
}
MPI_Gatherv(localResult, resultCount, MPI_DOUBLE, C, rcounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (myRank == 0) matprint("Resulting matrix", C, N);
free(A);
free(B);
free(C);
free(rows);
free(columns);
free(localResult);
MPI_Finalize();
return EXIT_SUCCESS;
}
"Time is a drug. Too much of it kills you." Terry Pratchett