#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifdef DO_LOG
/* We don't really need the mpe.h file, and if we are combining this with
-mpilog, we'll get the correct libraries */
/* #include "mpe.h" */
#else
#define MPE_Log_event( a, b, c )
#define MPE_Describe_state(a,b,c,d)
#endif
void Compute( cnt, size, databuf )
int cnt, size;
double *databuf;
{
int i,j;
MPE_Log_event(1, 0, "");
for (j = 0; j < cnt; j++) {
for (i = 0; i < size; i++)
databuf[i] = sqrt(sqrt(databuf[i]));
}
MPE_Log_event(2, 0, "");
}
int main( argc, argv )
int argc;
char **argv;
{
int rank, size, left_nbr, right_nbr;
int false = 0;
int true = 1;
int i, k, n, m, args[2];
double *rbuf, *sbuf, *databuf;
MPI_Comm comm;
MPI_Request r_recv, r_send, r[2];
MPI_Status status, statuses[2];
double t_comm, t_compute, t_both, t1;
MPI_Init( &argc, &argv );
/* Get n and m */
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
if (rank == 0) {
/* Add a compute state */
MPE_Describe_state(1, 2, "Compute", "purple:vlines3");
/* Set the defaults */
args[0] = 20000;
args[1] = 20000;
for (i=0; i<argc; i++) {
if (!argv[i]) continue;
if (strcmp( argv[i], "-n" ) == 0) {
args[0] = atoi( argv[i+1] );
i++;
}
else if (strcmp( argv[i], "-m" ) == 0) {
args[1] = atoi( argv[i+1] );
i++;
}
}
}
MPI_Bcast( args, 2, MPI_INT, 0, MPI_COMM_WORLD );
n = args[0];
m = args[1];
/* Create a "good" communicator and get the neighbors (non-periodic) */
MPI_Comm_size( MPI_COMM_WORLD, &size );
MPI_Cart_create( MPI_COMM_WORLD, 1, &size, &false, true, &comm );
MPI_Cart_shift( comm, 0, 1, &left_nbr, &right_nbr );
MPI_Comm_size( comm, &size );
MPI_Comm_rank( comm, &rank );
/* Get the buffers */
rbuf = (double *) malloc( n * sizeof(double) );
sbuf = (double *) malloc( n * sizeof(double) );
databuf = (double *)malloc( m * sizeof(double) );
if (!rbuf || !sbuf) {
fprintf( stderr, "Unable to allocate buffers of size %n\n", n );
MPI_Abort( MPI_COMM_WORLD, 1 );
}
if (!databuf) {
fprintf( stderr, "Unable to allocate buffers of size %n\n", m );
MPI_Abort( MPI_COMM_WORLD, 1 );
}
for (k=0; k<m; k++) {
databuf[k] = 1000.0;
}
/* Make sure that data has cycled through cache first */
Compute( 1, m, databuf );
t1 = MPI_Wtime();
Compute( 1, m, databuf );
t_compute= MPI_Wtime() - t1;
/* For comparison, here's an Irecv/Isend/Wait without any computing */
MPI_Barrier( comm );
t1 = MPI_Wtime();
MPI_Irecv( rbuf, n, MPI_DOUBLE, left_nbr, 5, comm, &r[0] );
MPI_Isend( sbuf, n, MPI_DOUBLE, right_nbr, 5, comm, &r[1] );
MPI_Waitall( 2, r, statuses );
t_comm = MPI_Wtime() - t1;
/* Simulate the computation */
MPI_Barrier( comm );
t1 = MPI_Wtime();
r_recv = MPI_REQUEST_NULL;
for (k=0;k<3;k++) {
/* Wait on the previous recv */
MPI_Wait( &r_recv, &status );
MPI_Irecv( rbuf, n, MPI_DOUBLE, left_nbr, k, comm, &r_recv );
MPI_Isend( sbuf, n, MPI_DOUBLE, right_nbr, k, comm, &r_send );
Compute( 1, m, databuf );
MPI_Wait( &r_send, &status );
}
MPI_Wait( &r_recv, &status );
t_both = MPI_Wtime() - t1;
t_both /= 3.0;
if (rank == 0) {
printf(
"For n = %d, m = %d, T_comm = %f, T_compute = %f, sum = %f, T_both = %f\n",
n, m, t_comm, t_compute, t_comm + t_compute, t_both );
}
MPI_Finalize( );
return 0;
}