Writing Sequential Files with MPI IO

The Concept

MPI IO can be used to write sequentially to multiple files.

The Code

The program takes the output file and the number of blocks to write as input arguments. Each process attempts to write random content to its own file. If any of the processes fails, the processes that did create files delete them.

$ cd mpi_io/03_writing_sequential/
$ cat src/mkranfiles.c
/******************************************************************************
 *                                                                            *
 *  MPI IO Example - Writing Sequential Files                                 *
 *                                                                            *
 *  Each process attempts to write a specified number of blocks to its        *
 *  own output file.                                                          *
 *                                                                            *
 ******************************************************************************
 *                                                                            *
 *  The original code was written by Gustav at University of Indiana in 2003. *
 *                                                                            *
 *  The current version has been tested/updated by the HPC department at      *
 *  the Norwegian University of Science and Technology in 2011.               *
 *                                                                            *
 ******************************************************************************/
#include    /* all IO stuff lives here */
#include   /* exit lives here */
#include   /* getopt lives here */
#include    /* UNIX error handling lives here */
#include   /* strcpy lives here */
#include      /* MPI and MPI-IO live here */
 
#define MASTER_RANK 0
#define TRUE 1
#define FALSE 0
#define BOOLEAN int
#define BLOCK_SIZE 1048576
#define MBYTE 1048576
#define SYNOPSIS printf ("synopsis: %s -f  -l \n", argv[0])
 
int main(argc, argv)
     int argc;
     char *argv[];
{
  /* my variables */
 
  int my_rank, pool_size, number_of_blocks = 0, i;
  int number_of_integers, number_of_bytes;
  long long total_number_of_integers, total_number_of_bytes;
  BOOLEAN i_am_the_master = FALSE, input_error = FALSE,
    my_file_open_error = FALSE, file_open_error = FALSE,
    my_write_error = FALSE, write_error = FALSE;
  char *basename = NULL, file_name[BUFSIZ], message[BUFSIZ];
  int basename_length, *junk;
  MPI_File fh;
  double start, finish, io_time, longest_io_time;
  char error_string[BUFSIZ];
  int length_of_error_string, error_class;
  MPI_Status  status;
 
  /* getopt variables */
 
  extern char *optarg;
  int c;
 
  /* error handling variables */
 
  extern int errno;
 
  /* ACTION */
 
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Comm_size(MPI_COMM_WORLD, &pool_size);
  if (my_rank == MASTER_RANK) i_am_the_master = TRUE;
 
  if (i_am_the_master) {
 
    /* read the command line */
 
    while ((c = getopt(argc, argv, "f:l:h")) != EOF)
      switch(c) {
      case 'f':
        basename = optarg;
        break;
      case 'l':
        if ((sscanf (optarg, "%d", &number_of_blocks) != 1) ||
            (number_of_blocks < 1))
          input_error = TRUE;
        break;
      case 'h':
        input_error = TRUE;
        break;
      case '?':
        input_error = TRUE;
        break;
      }
 
    /* Check if the command line has initialized both the basename and
     * the number_of_blocks.
     */
 
    if ((basename == NULL) || (number_of_blocks == 0)) input_error = TRUE;
 
    if (input_error)
      SYNOPSIS;
    else {
      basename_length = strlen(basename) + 1;
#ifdef DEBUG
      printf("basename         = %s\n", basename);
      printf("basename_length  = %d\n", basename_length);
      printf("number_of_blocks = %d\n", number_of_blocks);
#endif
    }
  } /* end of if(i_am_the_master) {  } */
 
  /* Transmit the effect of reading the command line to other
     processes. */
 
  MPI_Bcast(&input_error, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD);
 
  if (! input_error) {
 
    /* If we managed to get here, data read from the command line
       is probably OK. */
 
    MPI_Bcast(&number_of_blocks, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD);
    MPI_Bcast(&basename_length, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD);
    if (! i_am_the_master) basename = (char*) malloc(basename_length);
    MPI_Bcast(basename, basename_length, MPI_CHAR, MASTER_RANK, MPI_COMM_WORLD);
 
#ifdef DEBUG
    printf("%3d: basename = %s, number_of_blocks = %d\n",
           my_rank, basename, number_of_blocks);
#endif
 
    /* Allocate space needed to generate the integers */
 
    number_of_integers = number_of_blocks * BLOCK_SIZE;
    number_of_bytes = sizeof(int) * number_of_integers;
    total_number_of_integers = (long long) number_of_integers
      * (long long) pool_size;
    total_number_of_bytes = (long long) number_of_bytes
      * (long long) pool_size;
    junk = (int*) malloc(number_of_bytes);
 
    /* Now every process creates its own file name and attempts
       to open the file. */
 
    sprintf(file_name, "%s.%d", basename, my_rank);
 
#ifdef DEBUG
    printf("%3d: opening file %s\n", my_rank, file_name);
#endif
 
    my_file_open_error =
      MPI_File_open(MPI_COMM_SELF, file_name,
                    MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
 
    if (my_file_open_error != MPI_SUCCESS) {
 
      MPI_Error_class(my_file_open_error, &error_class);
      MPI_Error_string(error_class, error_string, &length_of_error_string);
      printf("%3d: %s\n", my_rank, error_string);
 
      MPI_Error_string(my_file_open_error, error_string,
                       &length_of_error_string);
      printf("%3d: %s\n", my_rank, error_string);
 
      my_file_open_error = TRUE;
 
    }
 
    /* Now we must ALL check that NOBODY had problems
       with opening the file. */
 
    MPI_Allreduce (&my_file_open_error, &file_open_error, 1, MPI_INT,
                   MPI_LOR, MPI_COMM_WORLD);
 
#ifdef DEBUG
    if (i_am_the_master)
      if (file_open_error)
        fprintf(stderr, "problem opening output files\n");
#endif
 
    /* If all files are open for writing, write to them */
 
    if (! file_open_error) {
      srand(28 + my_rank);
      for (i = 0; i < number_of_integers; i++) *(junk + i) = rand();
      start = MPI_Wtime();
      my_write_error =
        MPI_File_write(fh, junk, number_of_integers, MPI_INT, &status);
      if (my_write_error != MPI_SUCCESS) {
        MPI_Error_class(my_write_error, &error_class);
        MPI_Error_string(error_class, error_string, &length_of_error_string);
        printf("%3d: %s\n", my_rank, error_string);
        MPI_Error_string(my_write_error, error_string, &length_of_error_string);
        printf("%3d: %s\n", my_rank, error_string);
        my_write_error = TRUE;
      }
      else {
        finish = MPI_Wtime();
        io_time = finish - start;
        printf("%3d: io_time = %f\n", my_rank, io_time);
      }
 
      /* Check if anybody had problems writing on the file */
 
      MPI_Allreduce (&my_write_error, &write_error, 1, MPI_INT,
                     MPI_LOR, MPI_COMM_WORLD);
 
#ifdef DEBUG
      if (i_am_the_master)
        if (write_error)
          fprintf(stderr, "problem writing on files\n");
#endif
 
    } /* of if(! file_open_error) {  } */
 
    /* Only processes that were successful opening the files
       need do close them here */
 
    if (!my_file_open_error) {
      MPI_File_close(&fh);
#ifdef DEBUG
      printf ("%3d: closed %s\n", my_rank, file_name);
#endif
    }
 
    /* If we have either write errors or file open errors,
       then processes that managed to open their files
       are requested to throw them away */
 
    if (write_error || file_open_error) {
      if (! my_file_open_error) {
        MPI_File_delete(file_name, MPI_INFO_NULL);
#ifdef DEBUG
        printf("%3d: deleted %s\n", my_rank, file_name);
#endif
      }
    }
    else {
      MPI_Reduce(&io_time, &longest_io_time, 1, MPI_DOUBLE, MPI_MAX,
                 MASTER_RANK, MPI_COMM_WORLD);
      if (i_am_the_master) {
        printf("longest_io_time       = %f seconds\n", longest_io_time);
        printf("total_number_of_bytes = %lld\n", total_number_of_bytes);
        printf("transfer rate         = %f MB/s\n",
               total_number_of_bytes / longest_io_time / MBYTE);
      }
 
    } /* end of if (write_error || file_open_error) {} */
        
  } /* end of if(! input_error) {  } */
 
  MPI_Finalize();
  exit(0);
}
Previous Instructions

MPI_Init() and MPI_Finalize(); Used to initialize and finalize the MPI program.

MPI_Comm_rank() and MPI_Comm_size(); Used to find the rank of a process and the total number of processes.

MPI_Bcast(); Used to broadcast the input from the master to the other processes.

MPI_File_open(); Open MPI file.

MPI_Error_class(); Get MPI error class from MPI error.

MPI_Error_string(); Get error string associated with error class.

MPI_Allreduce(); Used with MPI_LOR to find if any of the processes had errors opening or writing to their file.

New instructions

MPI_File_delete( file_name, MPI_INFO_NULL ); Used to delete the files that were created if not all the processes completed successfully.

Compile & Run

If you have not already done so, obtain all the example code here.

Switch to the Intel compiler (optional, only necessary once in each terminal session)

$ module load intel

Compile the program using

$ make

 Submit the job to the queue

$ make submit

The output files from the program execution are placed in the output folder

$ ls output/
131846.vilje.hpc.ntnu.no.ER  file.0  file.10  file.12  file.14  file.2  file.4  file.6  file.8
131846.vilje.hpc.ntnu.no.OU  file.1  file.11  file.13  file.15  file.3  file.5  file.7  file.9

The standard out is placed in the .OU file

$ cat output/*OU
  6: io_time = 1.070781
 14: io_time = 1.100705
  0: io_time = 1.222521
 10: io_time = 1.239067
 11: io_time = 1.319720
  1: io_time = 1.682421
  5: io_time = 1.736790
  3: io_time = 1.797548
  2: io_time = 1.811755
 12: io_time = 1.827409
  9: io_time = 1.878157
 13: io_time = 1.907817
  8: io_time = 1.974429
  7: io_time = 2.016452
 15: io_time = 2.021286
  4: io_time = 2.033217
longest_io_time       = 2.033217 seconds
total_number_of_bytes = 469762048
transfer rate         = 220.340480 MB/s
Scroll to Top