1 /* 2 * 3 * This is the SyncIO library that uses MPI-IO collective functions to 4 * implement a flexible I/O checkpoint solution for a large number of 5 * processors. 6 * 7 * Previous developer: Ning Liu (liun2@cs.rpi.edu) 8 * Jing Fu (fuj@cs.rpi.edu) 9 * Current developers: Michel Rasquin (Michel.Rasquin@colorado.edu), 10 * Ben Matthews (benjamin.a.matthews@colorado.edu) 11 * 12 */ 13 14 #include <map> 15 #include <vector> 16 #include <string> 17 #include <string.h> 18 #include <ctype.h> 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <math.h> 22 #include <sstream> 23 #include "phastaIO.h" 24 #include "mpi.h" 25 #include "phiotmrc.h" 26 27 #define VERSION_INFO_HEADER_SIZE 8192 28 #define DB_HEADER_SIZE 1024 29 #define ONE_MEGABYTE 1048576 30 #define TWO_MEGABYTE 2097152 31 #define ENDIAN_TEST_NUMBER 12180 // Troy's Zip Code!! 32 #define MAX_PHASTA_FILES 64 33 #define MAX_PHASTA_FILE_NAME_LENGTH 1024 34 #define MAX_FIELDS_NUMBER ((VERSION_INFO_HEADER_SIZE/MAX_FIELDS_NAME_LENGTH)-4) // The meta data include - MPI_IO_Tag, nFields, nFields*names of the fields, nppf 35 // -3 for MPI_IO_Tag, nFields and nppf, -4 for extra security (former nFiles) 36 #define MAX_FIELDS_NAME_LENGTH 128 37 #define DefaultMHSize (4*ONE_MEGABYTE) 38 //#define DefaultMHSize (8350) //For test 39 #define LATEST_WRITE_VERSION 1 40 #define inv1024sq 953.674316406e-9 // = 1/1024/1024 41 int MasterHeaderSize = -1; 42 43 bool PRINT_PERF = false; // default print no perf results 44 int irank = -1; // global rank, should never be manually manipulated 45 int mysize = -1; 46 47 // Static variables are bad but used here to store the subcommunicator and associated variables 48 // Prevent MPI_Comm_split to be called more than once, especially on BGQ with the V1R2M1 driver (leak detected in MPI_Comm_split - IBM working on it) 49 static int s_assign_local_comm = 0; 50 static MPI_Comm s_local_comm; 51 static int s_local_size = -1; 52 static int s_local_rank = -1; 53 54 //unsigned long long pool_align = 8; 55 //unsigned long long mem_address; 56 57 // the following defines are for debug printf 58 #define PHASTAIO_PREFIX "phastaIO debug: " 59 #define PHASTAIO_DEBUG 0 //default to not print any debugging info 60 61 #if PHASTAIO_DEBUG 62 #define phprintf( s, arg...) printf(PHASTAIO_PREFIX s "\n", ##arg) 63 #define phprintf_0( s, arg...) do{ \ 64 MPI_Comm_rank(MPI_COMM_WORLD, &irank); \ 65 if(irank == 0){ \ 66 printf(PHASTAIO_PREFIX "irank=0: " s "\n", ##arg); \ 67 } \ 68 } while(0) 69 #else 70 #define phprintf( s, arg...) 71 #define phprintf_0( s, arg...) 72 #endif 73 74 enum PhastaIO_Errors 75 { 76 MAX_PHASTA_FILES_EXCEEDED = -1, 77 UNABLE_TO_OPEN_FILE = -2, 78 NOT_A_MPI_FILE = -3, 79 GPID_EXCEEDED = -4, 80 DATA_TYPE_ILLEGAL = -5, 81 }; 82 83 using namespace std; 84 85 namespace{ 86 87 typedef map<int, char*> mic; 88 mic LastHeaderKey; 89 vector< FILE* > fileArray; 90 vector< bool > byte_order; 91 vector< int > header_type; 92 int DataSize=0; 93 bool LastHeaderNotFound = false; 94 bool Wrong_Endian = false ; 95 bool Strict_Error = false ; 96 bool binary_format = true; 97 98 /***********************************************************************/ 99 /***************** NEW PHASTA IO CODE STARTS HERE **********************/ 100 /***********************************************************************/ 101 102 typedef struct 103 { 104 char filename[MAX_PHASTA_FILE_NAME_LENGTH]; /* defafults to 1024 */ 105 unsigned long long my_offset; 106 unsigned long long next_start_address; 107 unsigned long long **my_offset_table; 108 unsigned long long **my_read_table; 109 110 double * double_chunk; 111 double * read_double_chunk; 112 113 int field_count; 114 int part_count; 115 int read_field_count; 116 int read_part_count; 117 int GPid; 118 int start_id; 119 120 int mhsize; 121 122 int myrank; 123 int numprocs; 124 int local_myrank; 125 int local_numprocs; 126 127 int nppp; 128 int nPPF; 129 int nFiles; 130 int nFields; 131 132 int * int_chunk; 133 int * read_int_chunk; 134 135 int Wrong_Endian; /* default to false */ 136 char * master_header; 137 MPI_File file_handle; 138 MPI_Comm local_comm; 139 } phastaio_file_t; 140 141 typedef struct 142 { 143 //unsigned long long my_offset; 144 //unsigned long long **offset_table; 145 //int fileID; 146 int nppf, nfields; 147 //int GPid; 148 //int read_field_count; 149 char * masterHeader; 150 }serial_file; 151 152 serial_file *SerialFile; 153 phastaio_file_t *PhastaIOActiveFiles[MAX_PHASTA_FILES]; 154 int PhastaIONextActiveIndex = 0; /* indicates next index to allocate */ 155 156 // the caller has the responsibility to delete the returned string 157 // TODO: StringStipper("nbc value? ") returns NULL? 158 char* 159 StringStripper( const char istring[] ) { 160 161 int length = strlen( istring ); 162 163 //char* dest = new char [ length + 1 ]; 164 char* dest = (char *)malloc( length + 1 ); 165 166 //char * dest; 167 //dest = (char *)malloc( length + 1 + pool_align ); 168 //if (dest & 0x0F != 0) printf("not aligned!!!!\n"); 169 //mem_address = (long long )dest; 170 //if( mem_address & (pool_align -1) ) 171 // dest += pool_align - (mem_address & (pool_align -1)); 172 173 strcpy( dest, istring ); 174 dest[ length ] = '\0'; 175 176 if ( char* p = strpbrk( dest, " ") ) 177 *p = '\0'; 178 179 return dest; 180 } 181 182 inline int 183 cscompare( const char teststring[], 184 const char targetstring[] ) { 185 186 char* s1 = const_cast<char*>(teststring); 187 char* s2 = const_cast<char*>(targetstring); 188 189 while( *s1 == ' ') s1++; 190 while( *s2 == ' ') s2++; 191 while( ( *s1 ) 192 && ( *s2 ) 193 && ( *s2 != '?') 194 && ( tolower( *s1 )==tolower( *s2 ) ) ) { 195 s1++; 196 s2++; 197 while( *s1 == ' ') s1++; 198 while( *s2 == ' ') s2++; 199 } 200 if ( !( *s1 ) || ( *s1 == '?') ) return 1; 201 else return 0; 202 } 203 204 inline void 205 isBinary( const char iotype[] ) { 206 207 char* fname = StringStripper( iotype ); 208 if ( cscompare( fname, "binary" ) ) binary_format = true; 209 else binary_format = false; 210 free (fname); 211 212 } 213 214 inline size_t 215 typeSize( const char typestring[] ) { 216 217 char* ts1 = StringStripper( typestring ); 218 219 if ( cscompare( "integer", ts1 ) ) { 220 free (ts1); 221 return sizeof(int); 222 } else if ( cscompare( "double", ts1 ) ) { 223 free (ts1); 224 return sizeof( double ); 225 } else { 226 free (ts1); 227 fprintf(stderr,"unknown type : %s\n",ts1); 228 return 0; 229 } 230 } 231 232 int 233 readHeader( FILE* fileObject, 234 const char phrase[], 235 int* params, 236 int expect ) { 237 238 char* text_header; 239 char* token; 240 char Line[1024]; 241 char junk; 242 bool FOUND = false ; 243 int real_length; 244 int skip_size, integer_value; 245 int rewind_count=0; 246 247 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 248 rewind( fileObject ); 249 clearerr( fileObject ); 250 rewind_count++; 251 fgets( Line, 1024, fileObject ); 252 } 253 254 while( !FOUND && ( rewind_count < 2 ) ) { 255 if ( ( Line[0] != '\n' ) && ( real_length = strcspn( Line, "#" )) ){ 256 //text_header = new char [ real_length + 1 ]; 257 text_header = (char *)malloc( real_length + 1 ); 258 //text_header = (char *)malloc( real_length + 1 + pool_align ); 259 //mem_address = (long long )text_header; 260 //if( mem_address & (pool_align -1) ) 261 // text_header += pool_align - (mem_address & (pool_align -1)); 262 263 strncpy( text_header, Line, real_length ); 264 text_header[ real_length ] =static_cast<char>(NULL); 265 token = strtok ( text_header, ":" ); 266 //if( cscompare( phrase , token ) ) { 267 // Double comparison required because different fields can still start 268 // with the same name for mixed meshes (nbc code, nbc values, etc). 269 // Would be easy to fix cscompare instead but would it break sth else? 270 if( cscompare( phrase , token ) && cscompare( token , phrase ) ) { 271 FOUND = true ; 272 token = strtok( NULL, " ,;<>" ); 273 skip_size = atoi( token ); 274 int i; 275 for( i=0; i < expect && ( token = strtok( NULL," ,;<>") ); i++) { 276 params[i] = atoi( token ); 277 } 278 if ( i < expect ) { 279 fprintf(stderr,"Aloha Expected # of ints not found for: %s\n",phrase ); 280 } 281 } else if ( cscompare(token,"byteorder magic number") ) { 282 if ( binary_format ) { 283 fread((void*)&integer_value,sizeof(int),1,fileObject); 284 fread( &junk, sizeof(char), 1 , fileObject ); 285 if ( 362436 != integer_value ) Wrong_Endian = true; 286 } else{ 287 fscanf(fileObject, "%d\n", &integer_value ); 288 } 289 } else { 290 /* some other header, so just skip over */ 291 token = strtok( NULL, " ,;<>" ); 292 skip_size = atoi( token ); 293 if ( binary_format) 294 fseek( fileObject, skip_size, SEEK_CUR ); 295 else 296 for( int gama=0; gama < skip_size; gama++ ) 297 fgets( Line, 1024, fileObject ); 298 } 299 free (text_header); 300 } // end of if before while loop 301 302 if ( !FOUND ) 303 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 304 rewind( fileObject ); 305 clearerr( fileObject ); 306 rewind_count++; 307 fgets( Line, 1024, fileObject ); 308 } 309 } 310 311 if ( !FOUND ) { 312 //fprintf(stderr, "Error: Could not find: %s\n", phrase); 313 if(irank == 0) printf("WARNING: Could not find: %s\n", phrase); 314 return 1; 315 } 316 return 0; 317 } 318 319 } // end unnamed namespace 320 321 322 // begin of publicly visible functions 323 324 /** 325 * This function takes a long long pointer and assign (start) phiotmrc value to it 326 */ 327 //void startTimer(unsigned long long* start) { 328 void startTimer(double* start) { 329 330 if( !PRINT_PERF ) return; 331 332 MPI_Barrier(MPI_COMM_WORLD); 333 *start = phiotmrc(); 334 } 335 336 /** 337 * This function takes a long long pointer and assign (end) phiotmrc value to it 338 */ 339 void endTimer(double* end) { 340 341 if( !PRINT_PERF ) return; 342 343 *end = phiotmrc(); 344 MPI_Barrier(MPI_COMM_WORLD); 345 } 346 347 /** 348 * choose to print some performance results (or not) according to 349 * the PRINT_PERF macro 350 */ 351 void printPerf( 352 const char* func_name, 353 double start, 354 double end, 355 unsigned long long datasize, 356 int printdatainfo, 357 const char* extra_msg) { 358 359 if( !PRINT_PERF ) return; 360 361 unsigned long long data_size = datasize; 362 363 double time = end - start; 364 365 unsigned long long isizemin,isizemax,isizetot; 366 double sizemin,sizemax,sizeavg,sizetot,rate; 367 double tmin, tmax, tavg, ttot; 368 369 MPI_Allreduce(&time, &tmin,1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 370 MPI_Allreduce(&time, &tmax,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 371 MPI_Allreduce(&time, &ttot,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 372 tavg = ttot/mysize; 373 374 if(irank == 0) { 375 if ( PhastaIONextActiveIndex == 0 ) printf("** 1PFPP "); 376 else printf("** syncIO "); 377 printf("%s(): Tmax = %f sec, Tmin = %f sec, Tavg = %f sec", func_name, tmax, tmin, tavg); 378 } 379 380 if(printdatainfo == 1) { // if printdatainfo ==1, compute I/O rate and block size 381 MPI_Allreduce(&data_size,&isizemin,1,MPI_LONG_LONG_INT,MPI_MIN,MPI_COMM_WORLD); 382 MPI_Allreduce(&data_size,&isizemax,1,MPI_LONG_LONG_INT,MPI_MAX,MPI_COMM_WORLD); 383 MPI_Allreduce(&data_size,&isizetot,1,MPI_LONG_LONG_INT,MPI_SUM,MPI_COMM_WORLD); 384 385 sizemin=(double)(isizemin*inv1024sq); 386 sizemax=(double)(isizemax*inv1024sq); 387 sizetot=(double)(isizetot*inv1024sq); 388 sizeavg=(double)(1.0*sizetot/mysize); 389 rate=(double)(1.0*sizetot/tmax); 390 391 if( irank == 0) { 392 printf(", Rate = %f MB/s [%s] \n \t\t\t block size: Min= %f MB; Max= %f MB; Avg= %f MB; Tot= %f MB\n", rate, extra_msg, sizemin,sizemax,sizeavg,sizetot); 393 } 394 } 395 else { 396 if(irank == 0) { 397 printf(" \n"); 398 //printf(" (%s) \n", extra_msg); 399 } 400 } 401 } 402 403 /** 404 * This function is normally called at the beginning of a read operation, before 405 * init function. 406 * This function (uses rank 0) reads out nfields, nppf, master header size, 407 * endianess and allocates for masterHeader string. 408 * These values are essential for following read operations. Rank 0 will bcast 409 * these values to other ranks in the commm world 410 * 411 * If the file set is of old POSIX format, it would throw error and exit 412 */ 413 void queryphmpiio(const char filename[],int *nfields, int *nppf) 414 { 415 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 416 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 417 418 if(irank == 0) { 419 FILE * fileHandle; 420 char* fname = StringStripper( filename ); 421 422 fileHandle = fopen (fname,"rb"); 423 if (fileHandle == NULL ) { 424 printf("\nError: File %s doesn't exist! Please check!\n",fname); 425 } 426 else { 427 SerialFile =(serial_file *)calloc( 1, sizeof( serial_file) ); 428 //SerialFile = (serial_file *)malloc( sizeof( serial_file ) + pool_align ); 429 //mem_address = (long long )SerialFile; 430 //if( mem_address & (pool_align -1) ) 431 // SerialFile += pool_align - (mem_address & (pool_align -1)); 432 433 //SerialFile->masterHeader = (char *)malloc(MasterHeaderSize); 434 //int meta_size_limit = 4*ONE_MEGABYTE; 435 //int meta_size_limit = (4+ MAX_FIELDS_NUMBER ) * MAX_FIELDS_NAME_LENGTH; 436 int meta_size_limit = VERSION_INFO_HEADER_SIZE; 437 SerialFile->masterHeader = (char *)malloc( meta_size_limit ); 438 //SerialFile->masterHeader = (char *)malloc( meta_size_limit + pool_align ); 439 //mem_address = (long long )SerialFile; 440 //if( mem_address & (pool_align -1) ) 441 // SerialFile->masterHeader += pool_align - (mem_address & (pool_align -1)); 442 443 fread(SerialFile->masterHeader, 1, meta_size_limit, fileHandle); 444 445 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 446 char version[MAX_FIELDS_NAME_LENGTH/4]; 447 int mhsize; 448 char * token; 449 int magic_number; 450 451 memcpy( read_out_tag, 452 SerialFile->masterHeader, 453 MAX_FIELDS_NAME_LENGTH-1 ); 454 455 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) { 456 // Test endianess ... 457 memcpy (&magic_number, 458 SerialFile->masterHeader + sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 459 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 460 461 if ( magic_number != ENDIAN_TEST_NUMBER ) { 462 printf("Endian is different!\n"); 463 // Will do swap later 464 } 465 466 // test version, old version, default masterheader size is 4M 467 // newer version masterheader size is read from first line 468 memcpy(version, 469 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/2, 470 MAX_FIELDS_NAME_LENGTH/4 - 1); //TODO: why -1? 471 472 if( cscompare ("version",version) ) { 473 // if there is "version" tag in the file, then it is newer format 474 // read master header size from here, otherwise use default 475 // Note: if version is "1", we know mhsize is at 3/4 place... 476 477 token = strtok(version, ":"); 478 token = strtok(NULL, " ,;<>" ); 479 int iversion = atoi(token); 480 481 if( iversion == 1) { 482 memcpy( &mhsize, 483 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/4*3 + sizeof("mhsize : ")-1, 484 sizeof(int)); 485 if ( magic_number != ENDIAN_TEST_NUMBER ) 486 SwapArrayByteOrder(&mhsize, sizeof(int), 1); 487 488 if( mhsize > DefaultMHSize ) { 489 //if actual headersize is larger than default, let's re-read 490 free(SerialFile->masterHeader); 491 SerialFile->masterHeader = (char *)malloc(mhsize); 492 fseek(fileHandle, 0, SEEK_SET); // reset the file stream position 493 fread(SerialFile->masterHeader,1,mhsize,fileHandle); 494 } 495 } 496 //TODO: check if this is a valid int?? 497 MasterHeaderSize = mhsize; 498 } 499 else { // else it's version 0's format w/o version tag, implicating MHSize=4M 500 MasterHeaderSize = DefaultMHSize; 501 } 502 503 memcpy( read_out_tag, 504 SerialFile->masterHeader+MAX_FIELDS_NAME_LENGTH+1, 505 MAX_FIELDS_NAME_LENGTH ); //TODO: why +1 506 507 // Read in # fields ... 508 token = strtok( read_out_tag, ":" ); 509 token = strtok( NULL," ,;<>" ); 510 *nfields = atoi( token ); 511 if ( *nfields > MAX_FIELDS_NUMBER) { 512 printf("Error queryphmpiio: nfields is larger than MAX_FIELDS_NUMBER!\n"); 513 } 514 SerialFile->nfields=*nfields; //TODO: sanity check of this int? 515 516 memcpy( read_out_tag, 517 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH * 2 518 + *nfields * MAX_FIELDS_NAME_LENGTH, 519 MAX_FIELDS_NAME_LENGTH); 520 521 token = strtok( read_out_tag, ":" ); 522 token = strtok( NULL," ,;<>" ); 523 *nppf = atoi( token ); 524 SerialFile->nppf=*nppf; //TODO: sanity check of int 525 } // end of if("MPI_IO_TAG") 526 else { 527 printf("Error queryphmpiio: The file you opened is not of syncIO new format, please check! read_out_tag = %s\n",read_out_tag); 528 exit(1); 529 } 530 fclose(fileHandle); 531 free(SerialFile->masterHeader); 532 free(SerialFile); 533 } //end of else 534 free(fname); 535 } 536 537 // Bcast value to every one 538 MPI_Bcast( nfields, 1, MPI_INT, 0, MPI_COMM_WORLD ); 539 MPI_Bcast( nppf, 1, MPI_INT, 0, MPI_COMM_WORLD ); 540 MPI_Bcast( &MasterHeaderSize, 1, MPI_INT, 0, MPI_COMM_WORLD ); 541 phprintf("Info queryphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 542 } 543 544 /** 545 * This function computes the right master header size (round to size of 2^n). 546 * This is only needed for file format version 1 in "write" mode. 547 */ 548 int computeMHSize(int nfields, int nppf, int version) { 549 int mhsize; 550 if(version == 1) { 551 //int meta_info_size = (2+nfields+1) * MAX_FIELDS_NAME_LENGTH; // 2 is MPI_IO_TAG and nFields, the others 1 is nppf 552 int meta_info_size = VERSION_INFO_HEADER_SIZE; 553 int actual_size = nfields * nppf * sizeof(long long) + meta_info_size; 554 //printf("actual_size = %d, offset table size = %d\n", actual_size, nfields * nppf * sizeof(long long)); 555 if (actual_size > DefaultMHSize) { 556 mhsize = (int) ceil( (double) actual_size/DefaultMHSize); // it's rounded to ceiling of this value 557 mhsize *= DefaultMHSize; 558 } 559 else { 560 mhsize = DefaultMHSize; 561 } 562 } 563 return mhsize; 564 } 565 566 /** 567 * Computes correct color of a rank according to number of files. 568 */ 569 extern "C" int computeColor( int myrank, int numprocs, int nfiles) { 570 int color = 571 (int)(myrank / (numprocs / nfiles)); 572 return color; 573 } 574 575 576 /** 577 * Check the file descriptor. 578 */ 579 void checkFileDescriptor(const char fctname[], 580 int* fileDescriptor ) { 581 if ( *fileDescriptor < 0 ) { 582 printf("Error: File descriptor = %d in %s\n",*fileDescriptor,fctname); 583 exit(1); 584 } 585 } 586 587 /** 588 * Initialize the file struct members and allocate space for file struct 589 * buffers. 590 * 591 * Note: this function is only called when we are using new format. Old POSIX 592 * format should skip this routine and call openfile() directly instead. 593 */ 594 int initphmpiio( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[]) 595 { 596 // we init irank again in case query not called (e.g. syncIO write case) 597 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 598 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 599 600 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 601 602 double timer_start, timer_end; 603 startTimer(&timer_start); 604 605 char* imode = StringStripper( mode ); 606 607 // Note: if it's read, we presume query was called prior to init and 608 // MasterHeaderSize is already set to correct value from parsing header 609 // otherwise it's write then it needs some computation to be set 610 if ( cscompare( "read", imode ) ) { 611 // do nothing 612 } 613 else if( cscompare( "write", imode ) ) { 614 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 615 } 616 else { 617 printf("Error initphmpiio: can't recognize the mode %s", imode); 618 exit(1); 619 } 620 free ( imode ); 621 622 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 623 624 int i, j; 625 626 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 627 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 628 endTimer(&timer_end); 629 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 630 return MAX_PHASTA_FILES_EXCEEDED; 631 } 632 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 633 // { 634 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 635 // { 636 // PhastaIOActiveFiles[i] = NULL; 637 // } 638 // } 639 640 641 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 642 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 643 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 644 //if( mem_address & (pool_align -1) ) 645 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 646 647 i = PhastaIONextActiveIndex; 648 PhastaIONextActiveIndex++; 649 650 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 651 652 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 653 654 PhastaIOActiveFiles[i]->Wrong_Endian = false; 655 656 PhastaIOActiveFiles[i]->nFields = *nfields; 657 PhastaIOActiveFiles[i]->nPPF = *nppf; 658 PhastaIOActiveFiles[i]->nFiles = *nfiles; 659 MPI_Comm_rank(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->myrank)); 660 MPI_Comm_size(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->numprocs)); 661 662 663 if( *nfiles > 1 ) { // split the ranks according to each mpiio file 664 665 if ( s_assign_local_comm == 0) { // call mpi_comm_split for the first (and only) time 666 667 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Building subcommunicator\n"); 668 669 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 670 MPI_Comm_split(MPI_COMM_WORLD, 671 color, 672 PhastaIOActiveFiles[i]->myrank, 673 &(PhastaIOActiveFiles[i]->local_comm)); 674 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 675 &(PhastaIOActiveFiles[i]->local_numprocs)); 676 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 677 &(PhastaIOActiveFiles[i]->local_myrank)); 678 679 // back up now these variables so that we do not need to call comm_split again 680 s_local_comm = PhastaIOActiveFiles[i]->local_comm; 681 s_local_size = PhastaIOActiveFiles[i]->local_numprocs; 682 s_local_rank = PhastaIOActiveFiles[i]->local_myrank; 683 s_assign_local_comm = 1; 684 } 685 else { // recycle the subcommunicator 686 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Recycling subcommunicator\n"); 687 PhastaIOActiveFiles[i]->local_comm = s_local_comm; 688 PhastaIOActiveFiles[i]->local_numprocs = s_local_size; 689 PhastaIOActiveFiles[i]->local_myrank = s_local_rank; 690 } 691 } 692 else { // *nfiles == 1 here - no need to call mpi_comm_split here 693 694 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Bypassing subcommunicator\n"); 695 PhastaIOActiveFiles[i]->local_comm = MPI_COMM_WORLD; 696 PhastaIOActiveFiles[i]->local_numprocs = PhastaIOActiveFiles[i]->numprocs; 697 PhastaIOActiveFiles[i]->local_myrank = PhastaIOActiveFiles[i]->myrank; 698 699 } 700 701 PhastaIOActiveFiles[i]->nppp = 702 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 703 704 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 705 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 706 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 707 708 PhastaIOActiveFiles[i]->my_offset_table = 709 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 710 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 711 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 712 //if( mem_address & (pool_align -1) ) 713 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 714 715 PhastaIOActiveFiles[i]->my_read_table = 716 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 717 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 718 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 719 //if( mem_address & (pool_align -1) ) 720 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 721 722 for (j=0; j<*nfields; j++) 723 { 724 PhastaIOActiveFiles[i]->my_offset_table[j] = 725 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 726 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 727 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 728 //if( mem_address & (pool_align -1) ) 729 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 730 731 PhastaIOActiveFiles[i]->my_read_table[j] = 732 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 733 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 734 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 735 //if( mem_address & (pool_align -1) ) 736 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 737 } 738 *filehandle = i; 739 740 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 741 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 742 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 743 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 744 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 745 746 /* 747 PhastaIOActiveFiles[i]->master_header = 748 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 749 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 750 if( mem_address & (pool_align -1) ) 751 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 752 753 PhastaIOActiveFiles[i]->double_chunk = 754 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 755 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 756 if( mem_address & (pool_align -1) ) 757 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 758 759 PhastaIOActiveFiles[i]->int_chunk = 760 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 761 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 762 if( mem_address & (pool_align -1) ) 763 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 764 765 PhastaIOActiveFiles[i]->read_double_chunk = 766 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 767 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 768 if( mem_address & (pool_align -1) ) 769 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 770 771 PhastaIOActiveFiles[i]->read_int_chunk = 772 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 773 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 774 if( mem_address & (pool_align -1) ) 775 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 776 */ 777 778 // Time monitoring 779 endTimer(&timer_end); 780 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 781 782 phprintf_0("Info initphmpiio: quiting function"); 783 784 return i; 785 } 786 787 /** 788 * Destruct the file struct and free buffers allocated in init function. 789 */ 790 void finalizephmpiio( int *fileDescriptor ) 791 { 792 double timer_start, timer_end; 793 startTimer(&timer_start); 794 795 int i, j; 796 i = *fileDescriptor; 797 //PhastaIONextActiveIndex--; 798 799 /* //free the offset table for this phasta file */ 800 //for(j=0; j<MAX_FIELDS_NUMBER; j++) //Danger: undefined behavior for my_*_table.[j] not allocated or not initialized to NULL 801 for(j=0; j<PhastaIOActiveFiles[i]->nFields; j++) 802 { 803 free( PhastaIOActiveFiles[i]->my_offset_table[j]); 804 free( PhastaIOActiveFiles[i]->my_read_table[j]); 805 } 806 free ( PhastaIOActiveFiles[i]->my_offset_table ); 807 free ( PhastaIOActiveFiles[i]->my_read_table ); 808 free ( PhastaIOActiveFiles[i]->master_header ); 809 free ( PhastaIOActiveFiles[i]->double_chunk ); 810 free ( PhastaIOActiveFiles[i]->int_chunk ); 811 free ( PhastaIOActiveFiles[i]->read_double_chunk ); 812 free ( PhastaIOActiveFiles[i]->read_int_chunk ); 813 814 if( PhastaIOActiveFiles[i]->nFiles > 1 && s_assign_local_comm ) { // the comm was split 815 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Freeing subcommunicator\n"); 816 s_assign_local_comm = 0; 817 MPI_Comm_free(&(PhastaIOActiveFiles[i]->local_comm)); 818 } 819 820 free( PhastaIOActiveFiles[i]); 821 822 endTimer(&timer_end); 823 printPerf("finalizempiio", timer_start, timer_end, 0, 0, ""); 824 825 PhastaIONextActiveIndex--; 826 } 827 828 829 /** 830 * Special init for M2N in order to create a subcommunicator for the reduced solution (requires PRINT_PERF to be false for now) 831 * Initialize the file struct members and allocate space for file struct buffers. 832 * 833 * Note: this function is only called when we are using new format. Old POSIX 834 * format should skip this routine and call openfile() directly instead. 835 */ 836 int initphmpiiosub( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[],MPI_Comm my_local_comm) 837 { 838 // we init irank again in case query not called (e.g. syncIO write case) 839 840 MPI_Comm_rank(my_local_comm, &irank); 841 MPI_Comm_size(my_local_comm, &mysize); 842 843 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 844 845 double timer_start, timer_end; 846 startTimer(&timer_start); 847 848 char* imode = StringStripper( mode ); 849 850 // Note: if it's read, we presume query was called prior to init and 851 // MasterHeaderSize is already set to correct value from parsing header 852 // otherwise it's write then it needs some computation to be set 853 if ( cscompare( "read", imode ) ) { 854 // do nothing 855 } 856 else if( cscompare( "write", imode ) ) { 857 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 858 } 859 else { 860 printf("Error initphmpiio: can't recognize the mode %s", imode); 861 exit(1); 862 } 863 free ( imode ); 864 865 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 866 867 int i, j; 868 869 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 870 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 871 endTimer(&timer_end); 872 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 873 return MAX_PHASTA_FILES_EXCEEDED; 874 } 875 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 876 // { 877 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 878 // { 879 // PhastaIOActiveFiles[i] = NULL; 880 // } 881 // } 882 883 884 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 885 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 886 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 887 //if( mem_address & (pool_align -1) ) 888 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 889 890 i = PhastaIONextActiveIndex; 891 PhastaIONextActiveIndex++; 892 893 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 894 895 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 896 897 PhastaIOActiveFiles[i]->Wrong_Endian = false; 898 899 PhastaIOActiveFiles[i]->nFields = *nfields; 900 PhastaIOActiveFiles[i]->nPPF = *nppf; 901 PhastaIOActiveFiles[i]->nFiles = *nfiles; 902 MPI_Comm_rank(my_local_comm, &(PhastaIOActiveFiles[i]->myrank)); 903 MPI_Comm_size(my_local_comm, &(PhastaIOActiveFiles[i]->numprocs)); 904 905 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 906 MPI_Comm_split(my_local_comm, 907 color, 908 PhastaIOActiveFiles[i]->myrank, 909 &(PhastaIOActiveFiles[i]->local_comm)); 910 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 911 &(PhastaIOActiveFiles[i]->local_numprocs)); 912 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 913 &(PhastaIOActiveFiles[i]->local_myrank)); 914 PhastaIOActiveFiles[i]->nppp = 915 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 916 917 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 918 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 919 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 920 921 PhastaIOActiveFiles[i]->my_offset_table = 922 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 923 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 924 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 925 //if( mem_address & (pool_align -1) ) 926 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 927 928 PhastaIOActiveFiles[i]->my_read_table = 929 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 930 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 931 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 932 //if( mem_address & (pool_align -1) ) 933 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 934 935 for (j=0; j<*nfields; j++) 936 { 937 PhastaIOActiveFiles[i]->my_offset_table[j] = 938 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 939 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 940 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 941 //if( mem_address & (pool_align -1) ) 942 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 943 944 PhastaIOActiveFiles[i]->my_read_table[j] = 945 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 946 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 947 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 948 //if( mem_address & (pool_align -1) ) 949 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 950 } 951 *filehandle = i; 952 953 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 954 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 955 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 956 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 957 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 958 959 /* 960 PhastaIOActiveFiles[i]->master_header = 961 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 962 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 963 if( mem_address & (pool_align -1) ) 964 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 965 966 PhastaIOActiveFiles[i]->double_chunk = 967 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 968 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 969 if( mem_address & (pool_align -1) ) 970 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 971 972 PhastaIOActiveFiles[i]->int_chunk = 973 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 974 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 975 if( mem_address & (pool_align -1) ) 976 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 977 978 PhastaIOActiveFiles[i]->read_double_chunk = 979 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 980 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 981 if( mem_address & (pool_align -1) ) 982 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 983 984 PhastaIOActiveFiles[i]->read_int_chunk = 985 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 986 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 987 if( mem_address & (pool_align -1) ) 988 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 989 */ 990 991 // Time monitoring 992 endTimer(&timer_end); 993 printPerf("initphmpiiosub", timer_start, timer_end, 0, 0, ""); 994 995 phprintf_0("Info initphmpiiosub: quiting function"); 996 997 return i; 998 } 999 1000 1001 1002 /** open file for both POSIX and MPI-IO syncIO format. 1003 * 1004 * If it's old POSIX format, simply call posix fopen(). 1005 * 1006 * If it's MPI-IO foramt: 1007 * in "read" mode, it builds the header table that points to the offset of 1008 * fields for parts; 1009 * in "write" mode, it opens the file with MPI-IO open routine. 1010 */ 1011 void openfile(const char filename[], 1012 const char mode[], 1013 int* fileDescriptor ) 1014 { 1015 phprintf_0("Info: entering openfile"); 1016 1017 double timer_start, timer_end; 1018 startTimer(&timer_start); 1019 1020 if ( PhastaIONextActiveIndex == 0 ) 1021 { 1022 FILE* file=NULL ; 1023 *fileDescriptor = 0; 1024 char* fname = StringStripper( filename ); 1025 char* imode = StringStripper( mode ); 1026 1027 if ( cscompare( "read", imode ) ) file = fopen(fname, "rb" ); 1028 else if( cscompare( "write", imode ) ) file = fopen(fname, "wb" ); 1029 else if( cscompare( "append", imode ) ) file = fopen(fname, "ab" ); 1030 1031 if ( !file ){ 1032 fprintf(stderr,"Error openfile: unable to open file %s",fname ) ; 1033 } else { 1034 fileArray.push_back( file ); 1035 byte_order.push_back( false ); 1036 header_type.push_back( sizeof(int) ); 1037 *fileDescriptor = fileArray.size(); 1038 } 1039 free (fname); 1040 free (imode); 1041 } 1042 else // else it would be parallel I/O, opposed to posix io 1043 { 1044 char* fname = StringStripper( filename ); 1045 char* imode = StringStripper( mode ); 1046 int rc; 1047 int i = *fileDescriptor; 1048 checkFileDescriptor("openfile",&i); 1049 char* token; 1050 1051 if ( cscompare( "read", imode ) ) 1052 { 1053 // if (PhastaIOActiveFiles[i]->myrank == 0) 1054 // printf("\n **********\nRead open ... ... regular version\n"); 1055 1056 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1057 fname, 1058 MPI_MODE_RDONLY, 1059 MPI_INFO_NULL, 1060 &(PhastaIOActiveFiles[i]->file_handle) ); 1061 1062 if(rc) 1063 { 1064 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1065 printf("Error openfile: Unable to open file %s! File descriptor = %d\n",fname,*fileDescriptor); 1066 endTimer(&timer_end); 1067 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1068 return; 1069 } 1070 1071 MPI_Status read_tag_status; 1072 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1073 int j; 1074 int magic_number; 1075 1076 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1077 MPI_File_read_at( PhastaIOActiveFiles[i]->file_handle, 1078 0, 1079 PhastaIOActiveFiles[i]->master_header, 1080 MasterHeaderSize, 1081 MPI_CHAR, 1082 &read_tag_status ); 1083 } 1084 1085 MPI_Bcast( PhastaIOActiveFiles[i]->master_header, 1086 MasterHeaderSize, 1087 MPI_CHAR, 1088 0, 1089 PhastaIOActiveFiles[i]->local_comm ); 1090 1091 memcpy( read_out_tag, 1092 PhastaIOActiveFiles[i]->master_header, 1093 MAX_FIELDS_NAME_LENGTH-1 ); 1094 1095 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) 1096 { 1097 // Test endianess ... 1098 memcpy ( &magic_number, 1099 PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1100 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 1101 1102 if ( magic_number != ENDIAN_TEST_NUMBER ) 1103 { 1104 PhastaIOActiveFiles[i]->Wrong_Endian = true; 1105 } 1106 1107 memcpy( read_out_tag, 1108 PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH+1, // TODO: WHY +1??? 1109 MAX_FIELDS_NAME_LENGTH ); 1110 1111 // Read in # fields ... 1112 token = strtok ( read_out_tag, ":" ); 1113 token = strtok( NULL," ,;<>" ); 1114 PhastaIOActiveFiles[i]->nFields = atoi( token ); 1115 1116 unsigned long long **header_table; 1117 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1118 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1119 //mem_address = (long long )header_table; 1120 //if( mem_address & (pool_align -1) ) 1121 // header_table += pool_align - (mem_address & (pool_align -1)); 1122 1123 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1124 { 1125 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1126 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof(unsigned long long *)); 1127 //mem_address = (long long )header_table[j]; 1128 //if( mem_address & (pool_align -1) ) 1129 // header_table[j] += pool_align - (mem_address & (pool_align -1)); 1130 } 1131 1132 // Read in the offset table ... 1133 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1134 { 1135 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1136 memcpy( header_table[j], 1137 PhastaIOActiveFiles[i]->master_header + 1138 VERSION_INFO_HEADER_SIZE + 1139 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1140 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1141 } 1142 1143 MPI_Scatter( header_table[j], 1144 PhastaIOActiveFiles[i]->nppp, 1145 MPI_LONG_LONG_INT, 1146 PhastaIOActiveFiles[i]->my_read_table[j], 1147 PhastaIOActiveFiles[i]->nppp, 1148 MPI_LONG_LONG_INT, 1149 0, 1150 PhastaIOActiveFiles[i]->local_comm ); 1151 1152 // Swap byte order if endianess is different ... 1153 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) { 1154 SwapArrayByteOrder( PhastaIOActiveFiles[i]->my_read_table[j], 1155 sizeof(long long int), 1156 PhastaIOActiveFiles[i]->nppp ); 1157 } 1158 } 1159 1160 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1161 free ( header_table[j] ); 1162 } 1163 free (header_table); 1164 1165 } // end of if MPI_IO_TAG 1166 else //else not valid MPI file 1167 { 1168 *fileDescriptor = NOT_A_MPI_FILE; 1169 printf("Error openfile: The file %s you opened is not in syncIO new format, please check again! File descriptor = %d, MasterHeaderSize = %d, read_out_tag = %s\n",fname,*fileDescriptor,MasterHeaderSize,read_out_tag); 1170 //Printing MasterHeaderSize is useful to test a compiler bug on Intrepid BGP 1171 endTimer(&timer_end); 1172 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1173 return; 1174 } 1175 } // end of if "read" 1176 else if( cscompare( "write", imode ) ) 1177 { 1178 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1179 fname, 1180 MPI_MODE_WRONLY | MPI_MODE_CREATE, 1181 MPI_INFO_NULL, 1182 &(PhastaIOActiveFiles[i]->file_handle) ); 1183 if(rc) 1184 { 1185 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1186 return; 1187 } 1188 } // end of if "write" 1189 free (fname); 1190 free (imode); 1191 } // end of if FileIndex != 0 1192 1193 endTimer(&timer_end); 1194 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1195 } 1196 1197 /** close file for both POSIX and MPI-IO syncIO format. 1198 * 1199 * If it's old POSIX format, simply call posix fclose(). 1200 * 1201 * If it's MPI-IO foramt: 1202 * in "read" mode, it simply close file with MPI-IO close routine. 1203 * in "write" mode, rank 0 in each group will re-assemble the master header and 1204 * offset table and write to the beginning of file, then close the file. 1205 */ 1206 void closefile( int* fileDescriptor, 1207 const char mode[] ) 1208 { 1209 double timer_start, timer_end; 1210 startTimer(&timer_start); 1211 1212 int i = *fileDescriptor; 1213 checkFileDescriptor("closefile",&i); 1214 1215 if ( PhastaIONextActiveIndex == 0 ) { 1216 char* imode = StringStripper( mode ); 1217 1218 if( cscompare( "write", imode ) 1219 || cscompare( "append", imode ) ) { 1220 fflush( fileArray[ *fileDescriptor - 1 ] ); 1221 } 1222 1223 fclose( fileArray[ *fileDescriptor - 1 ] ); 1224 free (imode); 1225 } 1226 else { 1227 char* imode = StringStripper( mode ); 1228 1229 //write master header here: 1230 if ( cscompare( "write", imode ) ) { 1231 // if ( PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields < 2*ONE_MEGABYTE/8 ) //SHOULD BE CHECKED 1232 // MasterHeaderSize = 4*ONE_MEGABYTE; 1233 // else 1234 // MasterHeaderSize = 4*ONE_MEGABYTE + PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields * 8 - 2*ONE_MEGABYTE; 1235 1236 MasterHeaderSize = computeMHSize( PhastaIOActiveFiles[i]->nFields, PhastaIOActiveFiles[i]->nPPF, LATEST_WRITE_VERSION); 1237 phprintf_0("Info closefile: myrank = %d, MasterHeaderSize = %d\n", PhastaIOActiveFiles[i]->myrank, MasterHeaderSize); 1238 1239 MPI_Status write_header_status; 1240 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1241 char version[MAX_FIELDS_NAME_LENGTH/4]; 1242 char mhsize[MAX_FIELDS_NAME_LENGTH/4]; 1243 int magic_number = ENDIAN_TEST_NUMBER; 1244 1245 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) 1246 { 1247 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1248 sprintf(mpi_tag, "MPI_IO_Tag : "); 1249 memcpy(PhastaIOActiveFiles[i]->master_header, 1250 mpi_tag, 1251 MAX_FIELDS_NAME_LENGTH); 1252 1253 bzero((void*)version,MAX_FIELDS_NAME_LENGTH/4); 1254 // this version is "1", print version in ASCII 1255 sprintf(version, "version : %d",1); 1256 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/2, 1257 version, 1258 MAX_FIELDS_NAME_LENGTH/4); 1259 1260 // master header size is computed using the formula above 1261 bzero((void*)mhsize,MAX_FIELDS_NAME_LENGTH/4); 1262 sprintf(mhsize, "mhsize : "); 1263 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/4*3, 1264 mhsize, 1265 MAX_FIELDS_NAME_LENGTH/4); 1266 1267 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1268 sprintf(mpi_tag, 1269 "\nnFields : %d\n", 1270 PhastaIOActiveFiles[i]->nFields); 1271 memcpy(PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH, 1272 mpi_tag, 1273 MAX_FIELDS_NAME_LENGTH); 1274 1275 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1276 sprintf(mpi_tag, "\nnPPF : %d\n", PhastaIOActiveFiles[i]->nPPF); 1277 memcpy( PhastaIOActiveFiles[i]->master_header+ 1278 PhastaIOActiveFiles[i]->nFields * 1279 MAX_FIELDS_NAME_LENGTH + 1280 MAX_FIELDS_NAME_LENGTH * 2, 1281 mpi_tag, 1282 MAX_FIELDS_NAME_LENGTH); 1283 1284 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1285 &magic_number, 1286 sizeof(int)); 1287 1288 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("mhsize : ") -1 + MAX_FIELDS_NAME_LENGTH/4*3, 1289 &MasterHeaderSize, 1290 sizeof(int)); 1291 } 1292 1293 int j = 0; 1294 unsigned long long **header_table; 1295 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1296 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1297 //mem_address = (long long )header_table; 1298 //if( mem_address & (pool_align -1) ) 1299 // header_table += pool_align - (mem_address & (pool_align -1)); 1300 1301 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1302 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1303 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof (unsigned long long *)); 1304 //mem_address = (long long )header_table[j]; 1305 //if( mem_address & (pool_align -1) ) 1306 // header_table[j] += pool_align - (mem_address & (pool_align - 1)); 1307 } 1308 1309 //if( irank == 0 ) printf("gonna mpi_gather, myrank = %d\n", irank); 1310 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1311 MPI_Gather( PhastaIOActiveFiles[i]->my_offset_table[j], 1312 PhastaIOActiveFiles[i]->nppp, 1313 MPI_LONG_LONG_INT, 1314 header_table[j], 1315 PhastaIOActiveFiles[i]->nppp, 1316 MPI_LONG_LONG_INT, 1317 0, 1318 PhastaIOActiveFiles[i]->local_comm ); 1319 } 1320 1321 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1322 1323 //if( irank == 0 ) printf("gonna memcpy for every procs, myrank = %d\n", irank); 1324 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1325 memcpy ( PhastaIOActiveFiles[i]->master_header + 1326 VERSION_INFO_HEADER_SIZE + 1327 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1328 header_table[j], 1329 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1330 } 1331 1332 //if( irank == 0 ) printf("gonna file_write_at(), myrank = %d\n", irank); 1333 MPI_File_write_at( PhastaIOActiveFiles[i]->file_handle, 1334 0, 1335 PhastaIOActiveFiles[i]->master_header, 1336 MasterHeaderSize, 1337 MPI_CHAR, 1338 &write_header_status ); 1339 } 1340 1341 ////free(PhastaIOActiveFiles[i]->master_header); 1342 1343 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1344 free ( header_table[j] ); 1345 } 1346 free (header_table); 1347 } 1348 1349 //if( irank == 0 ) printf("gonna file_close(), myrank = %d\n", irank); 1350 MPI_File_close( &( PhastaIOActiveFiles[i]->file_handle ) ); 1351 free ( imode ); 1352 } 1353 1354 endTimer(&timer_end); 1355 printPerf("closefile_", timer_start, timer_end, 0, 0, ""); 1356 } 1357 1358 void readheader( int* fileDescriptor, 1359 const char keyphrase[], 1360 void* valueArray, 1361 int* nItems, 1362 const char datatype[], 1363 const char iotype[] ) 1364 { 1365 double timer_start, timer_end; 1366 1367 startTimer(&timer_start); 1368 1369 int i = *fileDescriptor; 1370 checkFileDescriptor("readheader",&i); 1371 1372 if ( PhastaIONextActiveIndex == 0 ) { 1373 int filePtr = *fileDescriptor - 1; 1374 FILE* fileObject; 1375 int* valueListInt; 1376 1377 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1378 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1379 fprintf(stderr,"openfile_ function has to be called before \n") ; 1380 fprintf(stderr,"acessing the file\n ") ; 1381 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1382 endTimer(&timer_end); 1383 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1384 return; 1385 } 1386 1387 if( LastHeaderKey.count(filePtr) ) 1388 free(LastHeaderKey[filePtr]); 1389 const int l = strlen(keyphrase)+1; 1390 LastHeaderKey[filePtr] = (char*) malloc(l*sizeof(char)); 1391 strcpy(LastHeaderKey[filePtr], keyphrase); 1392 LastHeaderNotFound = false; 1393 1394 fileObject = fileArray[ filePtr ] ; 1395 Wrong_Endian = byte_order[ filePtr ]; 1396 1397 isBinary( iotype ); 1398 typeSize( datatype ); //redundant call, just avoid a compiler warning. 1399 1400 // right now we are making the assumption that we will only write integers 1401 // on the header line. 1402 1403 valueListInt = static_cast< int* >( valueArray ); 1404 int ierr = readHeader( fileObject , 1405 keyphrase, 1406 valueListInt, 1407 *nItems ) ; 1408 1409 byte_order[ filePtr ] = Wrong_Endian ; 1410 1411 if ( ierr ) LastHeaderNotFound = true; 1412 1413 //return ; // don't return, go to the end to print perf 1414 } 1415 else { 1416 unsigned int skip_size; 1417 int* valueListInt; 1418 valueListInt = static_cast <int*>(valueArray); 1419 char* token = NULL; 1420 bool FOUND = false ; 1421 isBinary( iotype ); 1422 1423 MPI_Status read_offset_status; 1424 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1425 memset(read_out_tag, '\0', MAX_FIELDS_NAME_LENGTH); 1426 char readouttag[MAX_FIELDS_NUMBER][MAX_FIELDS_NAME_LENGTH]; 1427 int j; 1428 1429 int string_length = strlen( keyphrase ); 1430 char* buffer = (char*) malloc ( string_length+1 ); 1431 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1432 //mem_address = (long long )buffer; 1433 //if( mem_address & (pool_align -1) ) 1434 // buffer += pool_align - (mem_address & (pool_align -1)); 1435 1436 strcpy ( buffer, keyphrase ); 1437 buffer[ string_length ] = '\0'; 1438 1439 char* st2 = strtok ( buffer, "@" ); 1440 st2 = strtok (NULL, "@"); 1441 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1442 if ( char* p = strpbrk(buffer, "@") ) 1443 *p = '\0'; 1444 1445 // Check if the user has input the right GPid 1446 if ( ( PhastaIOActiveFiles[i]->GPid <= 1447 PhastaIOActiveFiles[i]->myrank * 1448 PhastaIOActiveFiles[i]->nppp )|| 1449 ( PhastaIOActiveFiles[i]->GPid > 1450 ( PhastaIOActiveFiles[i]->myrank + 1 ) * 1451 PhastaIOActiveFiles[i]->nppp ) ) 1452 { 1453 *fileDescriptor = NOT_A_MPI_FILE; 1454 printf("Error readheader: The file is not in syncIO new format, please check! myrank = %d, GPid = %d, nppp = %d, keyphrase = %s\n", PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->GPid, PhastaIOActiveFiles[i]->nppp, keyphrase); 1455 // It is possible atoi could not generate a clear integer from st2 because of additional garbage character in keyphrase 1456 endTimer(&timer_end); 1457 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1458 return; 1459 } 1460 1461 // Find the field we want ... 1462 //for ( j = 0; j<MAX_FIELDS_NUMBER; j++ ) 1463 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1464 { 1465 memcpy( readouttag[j], 1466 PhastaIOActiveFiles[i]->master_header + j*MAX_FIELDS_NAME_LENGTH+MAX_FIELDS_NAME_LENGTH*2+1, 1467 MAX_FIELDS_NAME_LENGTH-1 ); 1468 } 1469 1470 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1471 { 1472 token = strtok ( readouttag[j], ":" ); 1473 1474 //if ( cscompare( buffer, token ) ) 1475 if ( cscompare( token , buffer ) && cscompare( buffer, token ) ) 1476 // This double comparison is required for the field "number of nodes" and all the other fields that start with "number of nodes" (i.g. number of nodes in the mesh"). 1477 // Would be safer to rename "number of nodes" by "number of nodes in the part" so that the name are completely unique. But much more work to do that (Nspre, phParAdapt, etc). 1478 // Since the field name are unique in SyncIO (as it includes part ID), this should be safe and there should be no issue with the "?" trailing character. 1479 { 1480 PhastaIOActiveFiles[i]->read_field_count = j; 1481 FOUND = true; 1482 //printf("buffer: %s | token: %s | j: %d\n",buffer,token,j); 1483 break; 1484 } 1485 } 1486 free(buffer); 1487 1488 if (!FOUND) 1489 { 1490 //if(irank==0) printf("Warning readheader: Not found %s \n",keyphrase); //PhastaIOActiveFiles[i]->myrank is certainly initialized here. 1491 if(PhastaIOActiveFiles[i]->myrank == 0) printf("WARNING readheader: Not found %s\n",keyphrase); 1492 endTimer(&timer_end); 1493 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1494 return; 1495 } 1496 1497 // Find the part we want ... 1498 PhastaIOActiveFiles[i]->read_part_count = PhastaIOActiveFiles[i]->GPid - 1499 PhastaIOActiveFiles[i]->myrank * PhastaIOActiveFiles[i]->nppp - 1; 1500 1501 PhastaIOActiveFiles[i]->my_offset = 1502 PhastaIOActiveFiles[i]->my_read_table[PhastaIOActiveFiles[i]->read_field_count][PhastaIOActiveFiles[i]->read_part_count]; 1503 1504 // printf("****Rank %d offset is %d\n",PhastaIOActiveFiles[i]->myrank,PhastaIOActiveFiles[i]->my_offset); 1505 1506 // Read each datablock header here ... 1507 1508 MPI_File_read_at_all( PhastaIOActiveFiles[i]->file_handle, 1509 PhastaIOActiveFiles[i]->my_offset+1, 1510 read_out_tag, 1511 MAX_FIELDS_NAME_LENGTH-1, 1512 MPI_CHAR, 1513 &read_offset_status ); 1514 token = strtok ( read_out_tag, ":" ); 1515 1516 // printf("&&&&Rank %d read_out_tag is %s\n",PhastaIOActiveFiles[i]->myrank,read_out_tag); 1517 1518 if( cscompare( keyphrase , token ) ) //No need to compare also token with keyphrase like above. We should already have the right one. Otherwise there is a problem. 1519 { 1520 FOUND = true ; 1521 token = strtok( NULL, " ,;<>" ); 1522 skip_size = atoi( token ); 1523 for( j=0; j < *nItems && ( token = strtok( NULL," ,;<>") ); j++ ) 1524 valueListInt[j] = atoi( token ); 1525 1526 if ( j < *nItems ) 1527 { 1528 fprintf( stderr, "Expected # of ints not found for: %s\n", keyphrase ); 1529 } 1530 } 1531 else { 1532 //if(irank==0) 1533 if(PhastaIOActiveFiles[i]->myrank == 0) 1534 // If we enter this if, there is a problem with the name of some fields 1535 { 1536 printf("Error readheader: Unexpected mismatch between keyphrase = %s and token = %s\n",keyphrase,token); 1537 } 1538 } 1539 } 1540 1541 endTimer(&timer_end); 1542 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1543 1544 } 1545 1546 void readdatablock( int* fileDescriptor, 1547 const char keyphrase[], 1548 void* valueArray, 1549 int* nItems, 1550 const char datatype[], 1551 const char iotype[] ) 1552 { 1553 //if(irank == 0) printf("entering readdatablock()\n"); 1554 unsigned long long data_size = 0; 1555 double timer_start, timer_end; 1556 startTimer(&timer_start); 1557 1558 int i = *fileDescriptor; 1559 checkFileDescriptor("readdatablock",&i); 1560 1561 if ( PhastaIONextActiveIndex == 0 ) { 1562 int filePtr = *fileDescriptor - 1; 1563 FILE* fileObject; 1564 char junk; 1565 1566 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1567 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1568 fprintf(stderr,"openfile_ function has to be called before\n") ; 1569 fprintf(stderr,"acessing the file\n ") ; 1570 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1571 endTimer(&timer_end); 1572 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1573 return; 1574 } 1575 1576 // error check.. 1577 // since we require that a consistant header always preceed the data block 1578 // let us check to see that it is actually the case. 1579 1580 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1581 fprintf(stderr, "Header not consistant with data block\n"); 1582 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1583 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1584 fprintf(stderr, "Please recheck read sequence \n"); 1585 if( Strict_Error ) { 1586 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1587 endTimer(&timer_end); 1588 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1589 return; 1590 } 1591 } 1592 1593 if ( LastHeaderNotFound ) { 1594 endTimer(&timer_end); 1595 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1596 return; 1597 } 1598 fileObject = fileArray[ filePtr ]; 1599 Wrong_Endian = byte_order[ filePtr ]; 1600 1601 size_t type_size = typeSize( datatype ); 1602 int nUnits = *nItems; 1603 isBinary( iotype ); 1604 1605 free(LastHeaderKey[filePtr]); 1606 LastHeaderKey.erase(filePtr); 1607 1608 if ( binary_format ) { 1609 fread( valueArray, type_size, nUnits, fileObject ); 1610 fread( &junk, sizeof(char), 1 , fileObject ); 1611 if ( Wrong_Endian ) SwapArrayByteOrder( valueArray, type_size, nUnits ); 1612 } else { 1613 1614 char* ts1 = StringStripper( datatype ); 1615 if ( cscompare( "integer", ts1 ) ) { 1616 for( int n=0; n < nUnits ; n++ ) 1617 fscanf(fileObject, "%d\n",(int*)((int*)valueArray+n) ); 1618 } else if ( cscompare( "double", ts1 ) ) { 1619 for( int n=0; n < nUnits ; n++ ) 1620 fscanf(fileObject, "%lf\n",(double*)((double*)valueArray+n) ); 1621 } 1622 free (ts1); 1623 } 1624 1625 //return; 1626 } 1627 else { 1628 // printf("read data block\n"); 1629 MPI_Status read_data_status; 1630 size_t type_size = typeSize( datatype ); 1631 int nUnits = *nItems; 1632 isBinary( iotype ); 1633 1634 // read datablock then 1635 //MR CHANGE 1636 // if ( cscompare ( datatype, "double")) 1637 char* ts2 = StringStripper( datatype ); 1638 if ( cscompare ( "double" , ts2)) 1639 //MR CHANGE END 1640 { 1641 1642 MPI_File_read_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 1643 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1644 valueArray, 1645 nUnits, 1646 MPI_DOUBLE ); 1647 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1648 valueArray, 1649 &read_data_status ); 1650 data_size=8*nUnits; 1651 1652 } 1653 //MR CHANGE 1654 // else if ( cscompare ( datatype, "integer")) 1655 else if ( cscompare ( "integer" , ts2)) 1656 //MR CHANGE END 1657 { 1658 MPI_File_read_at_all_begin(PhastaIOActiveFiles[i]->file_handle, 1659 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1660 valueArray, 1661 nUnits, 1662 MPI_INT ); 1663 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1664 valueArray, 1665 &read_data_status ); 1666 data_size=4*nUnits; 1667 } 1668 else 1669 { 1670 *fileDescriptor = DATA_TYPE_ILLEGAL; 1671 printf("readdatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 1672 endTimer(&timer_end); 1673 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1674 return; 1675 } 1676 free(ts2); 1677 1678 1679 // printf("%d Read finishe\n",PhastaIOActiveFiles[i]->myrank); 1680 1681 // Swap data byte order if endianess is different ... 1682 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) 1683 { 1684 SwapArrayByteOrder( valueArray, type_size, nUnits ); 1685 } 1686 } 1687 1688 endTimer(&timer_end); 1689 char extra_msg[1024]; 1690 memset(extra_msg, '\0', 1024); 1691 char* key = StringStripper(keyphrase); 1692 sprintf(extra_msg, " field is %s ", key); 1693 printPerf("readdatablock", timer_start, timer_end, data_size, 1, extra_msg); 1694 free(key); 1695 1696 } 1697 1698 void writeheader( const int* fileDescriptor, 1699 const char keyphrase[], 1700 const void* valueArray, 1701 const int* nItems, 1702 const int* ndataItems, 1703 const char datatype[], 1704 const char iotype[]) 1705 { 1706 1707 //if(irank == 0) printf("entering writeheader()\n"); 1708 1709 double timer_start, timer_end; 1710 startTimer(&timer_start); 1711 1712 int i = *fileDescriptor; 1713 checkFileDescriptor("writeheader",&i); 1714 1715 if ( PhastaIONextActiveIndex == 0 ) { 1716 int filePtr = *fileDescriptor - 1; 1717 FILE* fileObject; 1718 1719 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1720 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1721 fprintf(stderr,"openfile_ function has to be called before \n") ; 1722 fprintf(stderr,"acessing the file\n ") ; 1723 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1724 endTimer(&timer_end); 1725 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1726 return; 1727 } 1728 1729 if( LastHeaderKey.count(filePtr) ) 1730 free(LastHeaderKey[filePtr]); 1731 const int l = strlen(keyphrase)+1; 1732 LastHeaderKey[filePtr] = (char*) malloc(l*sizeof(char)); 1733 strcpy(LastHeaderKey[filePtr], keyphrase); 1734 DataSize = *ndataItems; 1735 fileObject = fileArray[ filePtr ] ; 1736 size_t type_size = typeSize( datatype ); 1737 isBinary( iotype ); 1738 header_type[ filePtr ] = type_size; 1739 1740 int _newline = ( *ndataItems > 0 ) ? sizeof( char ) : 0; 1741 int size_of_nextblock = 1742 ( binary_format ) ? type_size*( *ndataItems )+ _newline : *ndataItems ; 1743 1744 fprintf( fileObject, "%s : < %d > ", keyphrase, size_of_nextblock ); 1745 for( int i = 0; i < *nItems; i++ ) 1746 fprintf(fileObject, "%d ", *((int*)((int*)valueArray+i))); 1747 fprintf(fileObject, "\n"); 1748 1749 //return ; 1750 } 1751 else { // else it's parallel I/O 1752 DataSize = *ndataItems; 1753 size_t type_size = typeSize( datatype ); 1754 isBinary( iotype ); 1755 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1756 1757 int string_length = strlen( keyphrase ); 1758 char* buffer = (char*) malloc ( string_length+1 ); 1759 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1760 //mem_address = (long long )buffer; 1761 //if( mem_address & (pool_align -1) ) 1762 // buffer += pool_align - (mem_address & (pool_align -1)); 1763 1764 strcpy ( buffer, keyphrase); 1765 buffer[ string_length ] = '\0'; 1766 1767 char* st2 = strtok ( buffer, "@" ); 1768 st2 = strtok (NULL, "@"); 1769 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1770 1771 if ( char* p = strpbrk(buffer, "@") ) 1772 *p = '\0'; 1773 1774 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1775 sprintf(mpi_tag, "\n%s : %d\n", buffer, PhastaIOActiveFiles[i]->field_count); 1776 unsigned long long offset_value; 1777 1778 int temp = *ndataItems; 1779 unsigned long long number_of_items = (unsigned long long)temp; 1780 MPI_Barrier(PhastaIOActiveFiles[i]->local_comm); 1781 1782 MPI_Scan( &number_of_items, 1783 &offset_value, 1784 1, 1785 MPI_LONG_LONG_INT, 1786 MPI_SUM, 1787 PhastaIOActiveFiles[i]->local_comm ); 1788 1789 offset_value = (offset_value - number_of_items) * type_size; 1790 1791 offset_value += PhastaIOActiveFiles[i]->local_myrank * 1792 DB_HEADER_SIZE + 1793 PhastaIOActiveFiles[i]->next_start_address; 1794 // This offset is the starting address of each datablock header... 1795 PhastaIOActiveFiles[i]->my_offset = offset_value; 1796 1797 // Write in my offset table ... 1798 PhastaIOActiveFiles[i]->my_offset_table[PhastaIOActiveFiles[i]->field_count][PhastaIOActiveFiles[i]->part_count] = 1799 PhastaIOActiveFiles[i]->my_offset; 1800 1801 // Update the next-start-address ... 1802 PhastaIOActiveFiles[i]->next_start_address = offset_value + 1803 number_of_items * type_size + 1804 DB_HEADER_SIZE; 1805 MPI_Bcast( &(PhastaIOActiveFiles[i]->next_start_address), 1806 1, 1807 MPI_LONG_LONG_INT, 1808 PhastaIOActiveFiles[i]->local_numprocs-1, 1809 PhastaIOActiveFiles[i]->local_comm ); 1810 1811 // Prepare datablock header ... 1812 int _newline = (*ndataItems>0)?sizeof(char):0; 1813 unsigned int size_of_nextblock = type_size * (*ndataItems) + _newline; 1814 1815 //char datablock_header[255]; 1816 //bzero((void*)datablock_header,255); 1817 char datablock_header[DB_HEADER_SIZE]; 1818 bzero((void*)datablock_header,DB_HEADER_SIZE); 1819 1820 PhastaIOActiveFiles[i]->GPid = PhastaIOActiveFiles[i]->nppp*PhastaIOActiveFiles[i]->myrank+PhastaIOActiveFiles[i]->part_count; 1821 sprintf( datablock_header, 1822 "\n%s : < %u >", 1823 keyphrase, 1824 size_of_nextblock ); 1825 1826 for ( int j = 0; j < *nItems; j++ ) 1827 { 1828 sprintf( datablock_header, 1829 "%s %d ", 1830 datablock_header, 1831 *((int*)((int*)valueArray+j))); 1832 } 1833 sprintf( datablock_header, 1834 "%s\n ", 1835 datablock_header ); 1836 1837 // Write datablock header ... 1838 //MR CHANGE 1839 // if ( cscompare(datatype,"double") ) 1840 char* ts1 = StringStripper( datatype ); 1841 if ( cscompare("double",ts1) ) 1842 //MR CHANGE END 1843 { 1844 free ( PhastaIOActiveFiles[i]->double_chunk ); 1845 PhastaIOActiveFiles[i]->double_chunk = ( double * )malloc( (sizeof( double )*number_of_items+ DB_HEADER_SIZE)); 1846 //PhastaIOActiveFiles[i]->double_chunk = ( double * ) malloc( sizeof( double )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1847 //mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 1848 //if( mem_address & (pool_align -1) ) 1849 // PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 1850 1851 double * aa = ( double * )datablock_header; 1852 memcpy(PhastaIOActiveFiles[i]->double_chunk, aa, DB_HEADER_SIZE); 1853 } 1854 //MR CHANGE 1855 // if ( cscompare(datatype,"integer") ) 1856 else if ( cscompare("integer",ts1) ) 1857 //MR CHANGE END 1858 { 1859 free ( PhastaIOActiveFiles[i]->int_chunk ); 1860 PhastaIOActiveFiles[i]->int_chunk = ( int * )malloc( (sizeof( int )*number_of_items+ DB_HEADER_SIZE)); 1861 //PhastaIOActiveFiles[i]->int_chunk = ( int * ) malloc( sizeof( int )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1862 //mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 1863 //if( mem_address & (pool_align -1) ) 1864 // PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & ( pool_align -1)); 1865 1866 int * aa = ( int * )datablock_header; 1867 memcpy(PhastaIOActiveFiles[i]->int_chunk, aa, DB_HEADER_SIZE); 1868 } 1869 else { 1870 // *fileDescriptor = DATA_TYPE_ILLEGAL; 1871 printf("writeheader - DATA_TYPE_ILLEGAL - %s\n",datatype); 1872 endTimer(&timer_end); 1873 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1874 return; 1875 } 1876 free(ts1); 1877 1878 PhastaIOActiveFiles[i]->part_count++; 1879 if ( PhastaIOActiveFiles[i]->part_count == PhastaIOActiveFiles[i]->nppp ) { 1880 //A new field will be written 1881 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1882 memcpy( PhastaIOActiveFiles[i]->master_header + 1883 PhastaIOActiveFiles[i]->field_count * 1884 MAX_FIELDS_NAME_LENGTH + 1885 MAX_FIELDS_NAME_LENGTH * 2, 1886 mpi_tag, 1887 MAX_FIELDS_NAME_LENGTH-1); 1888 } 1889 PhastaIOActiveFiles[i]->field_count++; 1890 PhastaIOActiveFiles[i]->part_count=0; 1891 } 1892 free(buffer); 1893 } 1894 1895 endTimer(&timer_end); 1896 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1897 } 1898 1899 void writedatablock( const int* fileDescriptor, 1900 const char keyphrase[], 1901 const void* valueArray, 1902 const int* nItems, 1903 const char datatype[], 1904 const char iotype[] ) 1905 { 1906 //if(irank == 0) printf("entering writedatablock()\n"); 1907 1908 unsigned long long data_size = 0; 1909 double timer_start, timer_end; 1910 startTimer(&timer_start); 1911 1912 int i = *fileDescriptor; 1913 checkFileDescriptor("writedatablock",&i); 1914 1915 if ( PhastaIONextActiveIndex == 0 ) { 1916 int filePtr = *fileDescriptor - 1; 1917 1918 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1919 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1920 fprintf(stderr,"openfile_ function has to be called before \n") ; 1921 fprintf(stderr,"acessing the file\n ") ; 1922 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1923 endTimer(&timer_end); 1924 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1925 return; 1926 } 1927 // since we require that a consistant header always preceed the data block 1928 // let us check to see that it is actually the case. 1929 1930 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1931 fprintf(stderr, "Header not consistant with data block\n"); 1932 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1933 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1934 fprintf(stderr, "Please recheck write sequence \n"); 1935 if( Strict_Error ) { 1936 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1937 endTimer(&timer_end); 1938 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1939 return; 1940 } 1941 } 1942 1943 FILE* fileObject = fileArray[ filePtr ] ; 1944 size_t type_size=typeSize( datatype ); 1945 isBinary( iotype ); 1946 1947 free(LastHeaderKey[filePtr]); 1948 LastHeaderKey.erase(filePtr); 1949 1950 if ( header_type[filePtr] != (int)type_size ) { 1951 fprintf(stderr,"header and datablock differ on typeof data in the block for\n"); 1952 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1953 if( Strict_Error ) { 1954 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1955 endTimer(&timer_end); 1956 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1957 return; 1958 } 1959 } 1960 1961 int nUnits = *nItems; 1962 1963 if ( nUnits != DataSize ) { 1964 fprintf(stderr,"header and datablock differ on number of data items for\n"); 1965 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1966 if( Strict_Error ) { 1967 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1968 endTimer(&timer_end); 1969 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1970 return; 1971 } 1972 } 1973 1974 if ( binary_format ) { 1975 1976 fwrite( valueArray, type_size, nUnits, fileObject ); 1977 fprintf( fileObject,"\n"); 1978 1979 } else { 1980 1981 char* ts1 = StringStripper( datatype ); 1982 if ( cscompare( "integer", ts1 ) ) { 1983 for( int n=0; n < nUnits ; n++ ) 1984 fprintf(fileObject,"%d\n",*((int*)((int*)valueArray+n))); 1985 } else if ( cscompare( "double", ts1 ) ) { 1986 for( int n=0; n < nUnits ; n++ ) 1987 fprintf(fileObject,"%lf\n",*((double*)((double*)valueArray+n))); 1988 } 1989 free (ts1); 1990 } 1991 //return ; 1992 } 1993 else { // syncIO case 1994 MPI_Status write_data_status; 1995 isBinary( iotype ); 1996 int nUnits = *nItems; 1997 1998 //MR CHANGE 1999 // if ( cscompare(datatype,"double") ) 2000 char* ts1 = StringStripper( datatype ); 2001 if ( cscompare("double",ts1) ) 2002 //MR CHANGE END 2003 { 2004 memcpy((PhastaIOActiveFiles[i]->double_chunk+DB_HEADER_SIZE/sizeof(double)), valueArray, nUnits*sizeof(double)); 2005 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 2006 PhastaIOActiveFiles[i]->my_offset, 2007 PhastaIOActiveFiles[i]->double_chunk, 2008 //BLOCK_SIZE/sizeof(double), 2009 nUnits+DB_HEADER_SIZE/sizeof(double), 2010 MPI_DOUBLE ); 2011 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 2012 PhastaIOActiveFiles[i]->double_chunk, 2013 &write_data_status ); 2014 data_size=8*nUnits; 2015 } 2016 //MR CHANGE 2017 // else if ( cscompare ( datatype, "integer")) 2018 else if ( cscompare("integer",ts1) ) 2019 //MR CHANGE END 2020 { 2021 memcpy((PhastaIOActiveFiles[i]->int_chunk+DB_HEADER_SIZE/sizeof(int)), valueArray, nUnits*sizeof(int)); 2022 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 2023 PhastaIOActiveFiles[i]->my_offset, 2024 PhastaIOActiveFiles[i]->int_chunk, 2025 nUnits+DB_HEADER_SIZE/sizeof(int), 2026 MPI_INT ); 2027 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 2028 PhastaIOActiveFiles[i]->int_chunk, 2029 &write_data_status ); 2030 data_size=4*nUnits; 2031 } 2032 else { 2033 printf("Error: writedatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 2034 endTimer(&timer_end); 2035 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 2036 return; 2037 } 2038 free(ts1); 2039 } 2040 2041 endTimer(&timer_end); 2042 char extra_msg[1024]; 2043 memset(extra_msg, '\0', 1024); 2044 char* key = StringStripper(keyphrase); 2045 sprintf(extra_msg, " field is %s ", key); 2046 printPerf("writedatablock", timer_start, timer_end, data_size, 1, extra_msg); 2047 free(key); 2048 2049 } 2050 2051 void 2052 SwapArrayByteOrder( void* array, 2053 int nbytes, 2054 int nItems ) 2055 { 2056 /* This swaps the byte order for the array of nItems each 2057 of size nbytes , This will be called only locally */ 2058 int i,j; 2059 unsigned char* ucDst = (unsigned char*)array; 2060 2061 for(i=0; i < nItems; i++) { 2062 for(j=0; j < (nbytes/2); j++) 2063 std::swap( ucDst[j] , ucDst[(nbytes - 1) - j] ); 2064 ucDst += nbytes; 2065 } 2066 } 2067 2068 void 2069 writestring( int* fileDescriptor, 2070 const char inString[] ) 2071 { 2072 2073 int filePtr = *fileDescriptor - 1; 2074 FILE* fileObject = fileArray[filePtr] ; 2075 fprintf(fileObject,"%s",inString ); 2076 return; 2077 } 2078 2079 void 2080 Gather_Headers( int* fileDescriptor, 2081 vector< string >& headers ) 2082 { 2083 2084 FILE* fileObject; 2085 char Line[1024]; 2086 2087 fileObject = fileArray[ (*fileDescriptor)-1 ]; 2088 2089 while( !feof(fileObject) ) { 2090 fgets( Line, 1024, fileObject); 2091 if ( Line[0] == '#' ) { 2092 headers.push_back( Line ); 2093 } else { 2094 break; 2095 } 2096 } 2097 rewind( fileObject ); 2098 clearerr( fileObject ); 2099 } 2100 void 2101 isWrong( void ) { (Wrong_Endian) ? fprintf(stdout,"YES\n"): fprintf(stdout,"NO\n") ; } 2102 2103 void 2104 togglestrictmode( void ) { Strict_Error = !Strict_Error; } 2105 2106 int 2107 isLittleEndian( void ) 2108 { 2109 // this function returns a 1 if the current running architecture is 2110 // LittleEndian Byte Ordered, else it returns a zero 2111 2112 union { 2113 long a; 2114 char c[sizeof( long )]; 2115 } endianUnion; 2116 2117 endianUnion.a = 1 ; 2118 2119 if ( endianUnion.c[sizeof(long)-1] != 1 ) return 1 ; 2120 else return 0; 2121 } 2122 2123 namespace PHASTA { 2124 const char* const PhastaIO_traits<int>::type_string = "integer"; 2125 const char* const PhastaIO_traits<double>::type_string = "double"; 2126 } 2127