1 /* 2 * 3 * This is the SyncIO library that uses MPI-IO collective functions to 4 * implement a flexible I/O checkpoint solution for a large number of 5 * processors. 6 * 7 * Previous developer: Ning Liu (liun2@cs.rpi.edu) 8 * Jing Fu (fuj@cs.rpi.edu) 9 * Current developers: Michel Rasquin (Michel.Rasquin@colorado.edu), 10 * Ben Matthews (benjamin.a.matthews@colorado.edu) 11 * 12 */ 13 14 #include <map> 15 #include <vector> 16 #include <string> 17 #include <string.h> 18 #include <ctype.h> 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <math.h> 22 #include <sstream> 23 #include "phastaIO.h" 24 #include "mpi.h" 25 #include "phiotmrc.h" 26 27 #define VERSION_INFO_HEADER_SIZE 8192 28 #define DB_HEADER_SIZE 1024 29 #define ONE_MEGABYTE 1048576 30 #define TWO_MEGABYTE 2097152 31 #define ENDIAN_TEST_NUMBER 12180 // Troy's Zip Code!! 32 #define MAX_PHASTA_FILES 64 33 #define MAX_PHASTA_FILE_NAME_LENGTH 1024 34 #define MAX_FIELDS_NUMBER ((VERSION_INFO_HEADER_SIZE/MAX_FIELDS_NAME_LENGTH)-4) // The meta data include - MPI_IO_Tag, nFields, nFields*names of the fields, nppf 35 // -3 for MPI_IO_Tag, nFields and nppf, -4 for extra security (former nFiles) 36 #define MAX_FIELDS_NAME_LENGTH 128 37 #define DefaultMHSize (4*ONE_MEGABYTE) 38 //#define DefaultMHSize (8350) //For test 39 #define LATEST_WRITE_VERSION 1 40 #define inv1024sq 953.674316406e-9 // = 1/1024/1024 41 int MasterHeaderSize = -1; 42 43 bool PRINT_PERF = false; // default print no perf results 44 int irank = -1; // global rank, should never be manually manipulated 45 int mysize = -1; 46 47 // Static variables are bad but used here to store the subcommunicator and associated variables 48 // Prevent MPI_Comm_split to be called more than once, especially on BGQ with the V1R2M1 driver (leak detected in MPI_Comm_split - IBM working on it) 49 static int s_assign_local_comm = 0; 50 static MPI_Comm s_local_comm; 51 static int s_local_size = -1; 52 static int s_local_rank = -1; 53 54 //unsigned long long pool_align = 8; 55 //unsigned long long mem_address; 56 57 // the following defines are for debug printf 58 #define PHASTAIO_PREFIX "phastaIO debug: " 59 #define PHASTAIO_DEBUG 0 //default to not print any debugging info 60 61 #if PHASTAIO_DEBUG 62 #define phprintf( s, arg...) printf(PHASTAIO_PREFIX s "\n", ##arg) 63 #define phprintf_0( s, arg...) do{ \ 64 MPI_Comm_rank(MPI_COMM_WORLD, &irank); \ 65 if(irank == 0){ \ 66 printf(PHASTAIO_PREFIX "irank=0: " s "\n", ##arg); \ 67 } \ 68 } while(0) 69 #else 70 #define phprintf( s, arg...) 71 #define phprintf_0( s, arg...) 72 #endif 73 74 enum PhastaIO_Errors 75 { 76 MAX_PHASTA_FILES_EXCEEDED = -1, 77 UNABLE_TO_OPEN_FILE = -2, 78 NOT_A_MPI_FILE = -3, 79 GPID_EXCEEDED = -4, 80 DATA_TYPE_ILLEGAL = -5, 81 }; 82 83 using namespace std; 84 85 namespace{ 86 87 typedef map<int, char*> mic; 88 mic LastHeaderKey; 89 vector< FILE* > fileArray; 90 vector< bool > byte_order; 91 vector< int > header_type; 92 int DataSize=0; 93 bool LastHeaderNotFound = false; 94 bool Wrong_Endian = false ; 95 bool Strict_Error = false ; 96 bool binary_format = true; 97 98 /***********************************************************************/ 99 /***************** NEW PHASTA IO CODE STARTS HERE **********************/ 100 /***********************************************************************/ 101 102 typedef struct 103 { 104 char filename[MAX_PHASTA_FILE_NAME_LENGTH]; /* defafults to 1024 */ 105 unsigned long long my_offset; 106 unsigned long long next_start_address; 107 unsigned long long **my_offset_table; 108 unsigned long long **my_read_table; 109 110 double * double_chunk; 111 double * read_double_chunk; 112 113 int field_count; 114 int part_count; 115 int read_field_count; 116 int read_part_count; 117 int GPid; 118 int start_id; 119 120 int mhsize; 121 122 int myrank; 123 int numprocs; 124 int local_myrank; 125 int local_numprocs; 126 127 int nppp; 128 int nPPF; 129 int nFiles; 130 int nFields; 131 132 int * int_chunk; 133 int * read_int_chunk; 134 135 int Wrong_Endian; /* default to false */ 136 char * master_header; 137 MPI_File file_handle; 138 MPI_Comm local_comm; 139 } phastaio_file_t; 140 141 typedef struct 142 { 143 //unsigned long long my_offset; 144 //unsigned long long **offset_table; 145 //int fileID; 146 int nppf, nfields; 147 //int GPid; 148 //int read_field_count; 149 char * masterHeader; 150 }serial_file; 151 152 serial_file *SerialFile; 153 phastaio_file_t *PhastaIOActiveFiles[MAX_PHASTA_FILES]; 154 int PhastaIONextActiveIndex = 0; /* indicates next index to allocate */ 155 156 // the caller has the responsibility to delete the returned string 157 // TODO: StringStipper("nbc value? ") returns NULL? 158 char* 159 StringStripper( const char istring[] ) { 160 161 int length = strlen( istring ); 162 163 //char* dest = new char [ length + 1 ]; 164 char* dest = (char *)malloc( length + 1 ); 165 166 //char * dest; 167 //dest = (char *)malloc( length + 1 + pool_align ); 168 //if (dest & 0x0F != 0) printf("not aligned!!!!\n"); 169 //mem_address = (long long )dest; 170 //if( mem_address & (pool_align -1) ) 171 // dest += pool_align - (mem_address & (pool_align -1)); 172 173 strcpy( dest, istring ); 174 dest[ length ] = '\0'; 175 176 if ( char* p = strpbrk( dest, " ") ) 177 *p = '\0'; 178 179 return dest; 180 } 181 182 inline int 183 cscompare( const char teststring[], 184 const char targetstring[] ) { 185 186 char* s1 = const_cast<char*>(teststring); 187 char* s2 = const_cast<char*>(targetstring); 188 189 while( *s1 == ' ') s1++; 190 while( *s2 == ' ') s2++; 191 while( ( *s1 ) 192 && ( *s2 ) 193 && ( *s2 != '?') 194 && ( tolower( *s1 )==tolower( *s2 ) ) ) { 195 s1++; 196 s2++; 197 while( *s1 == ' ') s1++; 198 while( *s2 == ' ') s2++; 199 } 200 if ( !( *s1 ) || ( *s1 == '?') ) return 1; 201 else return 0; 202 } 203 204 inline void 205 isBinary( const char iotype[] ) { 206 207 char* fname = StringStripper( iotype ); 208 if ( cscompare( fname, "binary" ) ) binary_format = true; 209 else binary_format = false; 210 free (fname); 211 212 } 213 214 inline size_t 215 typeSize( const char typestring[] ) { 216 217 char* ts1 = StringStripper( typestring ); 218 219 if ( cscompare( "integer", ts1 ) ) { 220 free (ts1); 221 return sizeof(int); 222 } else if ( cscompare( "double", ts1 ) ) { 223 free (ts1); 224 return sizeof( double ); 225 } else { 226 free (ts1); 227 fprintf(stderr,"unknown type : %s\n",ts1); 228 return 0; 229 } 230 } 231 232 int 233 readHeader( FILE* fileObject, 234 const char phrase[], 235 int* params, 236 int expect ) { 237 238 char* text_header; 239 char* token; 240 char Line[1024]; 241 char junk; 242 bool FOUND = false ; 243 int real_length; 244 int skip_size, integer_value; 245 int rewind_count=0; 246 247 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 248 rewind( fileObject ); 249 clearerr( fileObject ); 250 rewind_count++; 251 fgets( Line, 1024, fileObject ); 252 } 253 254 while( !FOUND && ( rewind_count < 2 ) ) { 255 if ( ( Line[0] != '\n' ) && ( real_length = strcspn( Line, "#" )) ){ 256 //text_header = new char [ real_length + 1 ]; 257 text_header = (char *)malloc( real_length + 1 ); 258 //text_header = (char *)malloc( real_length + 1 + pool_align ); 259 //mem_address = (long long )text_header; 260 //if( mem_address & (pool_align -1) ) 261 // text_header += pool_align - (mem_address & (pool_align -1)); 262 263 strncpy( text_header, Line, real_length ); 264 text_header[ real_length ] =static_cast<char>(NULL); 265 token = strtok ( text_header, ":" ); 266 //if( cscompare( phrase , token ) ) { 267 // Double comparison required because different fields can still start 268 // with the same name for mixed meshes (nbc code, nbc values, etc). 269 // Would be easy to fix cscompare instead but would it break sth else? 270 if( cscompare( phrase , token ) && cscompare( token , phrase ) ) { 271 FOUND = true ; 272 token = strtok( NULL, " ,;<>" ); 273 skip_size = atoi( token ); 274 int i; 275 for( i=0; i < expect && ( token = strtok( NULL," ,;<>") ); i++) { 276 params[i] = atoi( token ); 277 } 278 if ( i < expect ) { 279 fprintf(stderr,"Aloha Expected # of ints not found for: %s\n",phrase ); 280 } 281 } else if ( cscompare(token,"byteorder magic number") ) { 282 if ( binary_format ) { 283 fread((void*)&integer_value,sizeof(int),1,fileObject); 284 fread( &junk, sizeof(char), 1 , fileObject ); 285 if ( 362436 != integer_value ) Wrong_Endian = true; 286 } else{ 287 fscanf(fileObject, "%d\n", &integer_value ); 288 } 289 } else { 290 /* some other header, so just skip over */ 291 token = strtok( NULL, " ,;<>" ); 292 skip_size = atoi( token ); 293 if ( binary_format) 294 fseek( fileObject, skip_size, SEEK_CUR ); 295 else 296 for( int gama=0; gama < skip_size; gama++ ) 297 fgets( Line, 1024, fileObject ); 298 } 299 free (text_header); 300 } // end of if before while loop 301 302 if ( !FOUND ) 303 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 304 rewind( fileObject ); 305 clearerr( fileObject ); 306 rewind_count++; 307 fgets( Line, 1024, fileObject ); 308 } 309 } 310 311 if ( !FOUND ) { 312 //fprintf(stderr, "Error: Could not find: %s\n", phrase); 313 if(irank == 0) printf("WARNING: Could not find: %s\n", phrase); 314 return 1; 315 } 316 return 0; 317 } 318 319 } // end unnamed namespace 320 321 322 // begin of publicly visible functions 323 324 /** 325 * This function takes a long long pointer and assign (start) phiotmrc value to it 326 */ 327 //void startTimer(unsigned long long* start) { 328 void startTimer(double* start) { 329 330 if( !PRINT_PERF ) return; 331 332 MPI_Barrier(MPI_COMM_WORLD); 333 *start = phiotmrc(); 334 } 335 336 /** 337 * This function takes a long long pointer and assign (end) phiotmrc value to it 338 */ 339 void endTimer(double* end) { 340 341 if( !PRINT_PERF ) return; 342 343 *end = phiotmrc(); 344 MPI_Barrier(MPI_COMM_WORLD); 345 } 346 347 /** 348 * choose to print some performance results (or not) according to 349 * the PRINT_PERF macro 350 */ 351 void printPerf( 352 const char* func_name, 353 double start, 354 double end, 355 unsigned long long datasize, 356 int printdatainfo, 357 const char* extra_msg) { 358 359 if( !PRINT_PERF ) return; 360 361 unsigned long long data_size = datasize; 362 363 double time = end - start; 364 365 unsigned long long isizemin,isizemax,isizetot; 366 double sizemin,sizemax,sizeavg,sizetot,rate; 367 double tmin, tmax, tavg, ttot; 368 369 MPI_Allreduce(&time, &tmin,1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 370 MPI_Allreduce(&time, &tmax,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 371 MPI_Allreduce(&time, &ttot,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 372 tavg = ttot/mysize; 373 374 if(irank == 0) { 375 if ( PhastaIONextActiveIndex == 0 ) printf("** 1PFPP "); 376 else printf("** syncIO "); 377 printf("%s(): Tmax = %f sec, Tmin = %f sec, Tavg = %f sec", func_name, tmax, tmin, tavg); 378 } 379 380 if(printdatainfo == 1) { // if printdatainfo ==1, compute I/O rate and block size 381 MPI_Allreduce(&data_size,&isizemin,1,MPI_LONG_LONG_INT,MPI_MIN,MPI_COMM_WORLD); 382 MPI_Allreduce(&data_size,&isizemax,1,MPI_LONG_LONG_INT,MPI_MAX,MPI_COMM_WORLD); 383 MPI_Allreduce(&data_size,&isizetot,1,MPI_LONG_LONG_INT,MPI_SUM,MPI_COMM_WORLD); 384 385 sizemin=(double)(isizemin*inv1024sq); 386 sizemax=(double)(isizemax*inv1024sq); 387 sizetot=(double)(isizetot*inv1024sq); 388 sizeavg=(double)(1.0*sizetot/mysize); 389 rate=(double)(1.0*sizetot/tmax); 390 391 if( irank == 0) { 392 printf(", Rate = %f MB/s [%s] \n \t\t\t block size: Min= %f MB; Max= %f MB; Avg= %f MB; Tot= %f MB\n", rate, extra_msg, sizemin,sizemax,sizeavg,sizetot); 393 } 394 } 395 else { 396 if(irank == 0) { 397 printf(" \n"); 398 //printf(" (%s) \n", extra_msg); 399 } 400 } 401 } 402 403 /** 404 * This function is normally called at the beginning of a read operation, before 405 * init function. 406 * This function (uses rank 0) reads out nfields, nppf, master header size, 407 * endianess and allocates for masterHeader string. 408 * These values are essential for following read operations. Rank 0 will bcast 409 * these values to other ranks in the commm world 410 * 411 * If the file set is of old POSIX format, it would throw error and exit 412 */ 413 void queryphmpiio(const char filename[],int *nfields, int *nppf) 414 { 415 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 416 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 417 418 if(irank == 0) { 419 FILE * fileHandle; 420 char* fname = StringStripper( filename ); 421 422 fileHandle = fopen (fname,"rb"); 423 if (fileHandle == NULL ) { 424 printf("\nError: File %s doesn't exist! Please check!\n",fname); 425 } 426 else { 427 SerialFile =(serial_file *)calloc( 1, sizeof( serial_file) ); 428 //SerialFile = (serial_file *)malloc( sizeof( serial_file ) + pool_align ); 429 //mem_address = (long long )SerialFile; 430 //if( mem_address & (pool_align -1) ) 431 // SerialFile += pool_align - (mem_address & (pool_align -1)); 432 433 //SerialFile->masterHeader = (char *)malloc(MasterHeaderSize); 434 //int meta_size_limit = 4*ONE_MEGABYTE; 435 //int meta_size_limit = (4+ MAX_FIELDS_NUMBER ) * MAX_FIELDS_NAME_LENGTH; 436 int meta_size_limit = VERSION_INFO_HEADER_SIZE; 437 SerialFile->masterHeader = (char *)malloc( meta_size_limit ); 438 //SerialFile->masterHeader = (char *)malloc( meta_size_limit + pool_align ); 439 //mem_address = (long long )SerialFile; 440 //if( mem_address & (pool_align -1) ) 441 // SerialFile->masterHeader += pool_align - (mem_address & (pool_align -1)); 442 443 fread(SerialFile->masterHeader, 1, meta_size_limit, fileHandle); 444 445 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 446 char version[MAX_FIELDS_NAME_LENGTH/4]; 447 int mhsize; 448 char * token; 449 int magic_number; 450 451 memcpy( read_out_tag, 452 SerialFile->masterHeader, 453 MAX_FIELDS_NAME_LENGTH-1 ); 454 455 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) { 456 // Test endianess ... 457 memcpy (&magic_number, 458 SerialFile->masterHeader + sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 459 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 460 461 if ( magic_number != ENDIAN_TEST_NUMBER ) { 462 printf("Endian is different!\n"); 463 // Will do swap later 464 } 465 466 // test version, old version, default masterheader size is 4M 467 // newer version masterheader size is read from first line 468 memcpy(version, 469 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/2, 470 MAX_FIELDS_NAME_LENGTH/4 - 1); //TODO: why -1? 471 472 if( cscompare ("version",version) ) { 473 // if there is "version" tag in the file, then it is newer format 474 // read master header size from here, otherwise use default 475 // Note: if version is "1", we know mhsize is at 3/4 place... 476 477 token = strtok(version, ":"); 478 token = strtok(NULL, " ,;<>" ); 479 int iversion = atoi(token); 480 481 if( iversion == 1) { 482 memcpy( &mhsize, 483 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/4*3 + sizeof("mhsize : ")-1, 484 sizeof(int)); 485 if ( magic_number != ENDIAN_TEST_NUMBER ) 486 SwapArrayByteOrder(&mhsize, sizeof(int), 1); 487 488 if( mhsize > DefaultMHSize ) { 489 //if actual headersize is larger than default, let's re-read 490 free(SerialFile->masterHeader); 491 SerialFile->masterHeader = (char *)malloc(mhsize); 492 fseek(fileHandle, 0, SEEK_SET); // reset the file stream position 493 fread(SerialFile->masterHeader,1,mhsize,fileHandle); 494 } 495 } 496 //TODO: check if this is a valid int?? 497 MasterHeaderSize = mhsize; 498 } 499 else { // else it's version 0's format w/o version tag, implicating MHSize=4M 500 MasterHeaderSize = DefaultMHSize; 501 } 502 503 memcpy( read_out_tag, 504 SerialFile->masterHeader+MAX_FIELDS_NAME_LENGTH+1, 505 MAX_FIELDS_NAME_LENGTH ); //TODO: why +1 506 507 // Read in # fields ... 508 token = strtok( read_out_tag, ":" ); 509 token = strtok( NULL," ,;<>" ); 510 *nfields = atoi( token ); 511 if ( *nfields > MAX_FIELDS_NUMBER) { 512 printf("Error queryphmpiio: nfields is larger than MAX_FIELDS_NUMBER!\n"); 513 } 514 SerialFile->nfields=*nfields; //TODO: sanity check of this int? 515 516 memcpy( read_out_tag, 517 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH * 2 518 + *nfields * MAX_FIELDS_NAME_LENGTH, 519 MAX_FIELDS_NAME_LENGTH); 520 521 token = strtok( read_out_tag, ":" ); 522 token = strtok( NULL," ,;<>" ); 523 *nppf = atoi( token ); 524 SerialFile->nppf=*nppf; //TODO: sanity check of int 525 } // end of if("MPI_IO_TAG") 526 else { 527 printf("Error queryphmpiio: The file you opened is not of syncIO new format, please check! read_out_tag = %s\n",read_out_tag); 528 exit(1); 529 } 530 fclose(fileHandle); 531 free(SerialFile->masterHeader); 532 free(SerialFile); 533 } //end of else 534 free(fname); 535 } 536 537 // Bcast value to every one 538 MPI_Bcast( nfields, 1, MPI_INT, 0, MPI_COMM_WORLD ); 539 MPI_Bcast( nppf, 1, MPI_INT, 0, MPI_COMM_WORLD ); 540 MPI_Bcast( &MasterHeaderSize, 1, MPI_INT, 0, MPI_COMM_WORLD ); 541 phprintf("Info queryphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 542 } 543 544 /** 545 * This function computes the right master header size (round to size of 2^n). 546 * This is only needed for file format version 1 in "write" mode. 547 */ 548 int computeMHSize(int nfields, int nppf, int version) { 549 int mhsize; 550 if(version == 1) { 551 //int meta_info_size = (2+nfields+1) * MAX_FIELDS_NAME_LENGTH; // 2 is MPI_IO_TAG and nFields, the others 1 is nppf 552 int meta_info_size = VERSION_INFO_HEADER_SIZE; 553 int actual_size = nfields * nppf * sizeof(long long) + meta_info_size; 554 //printf("actual_size = %d, offset table size = %d\n", actual_size, nfields * nppf * sizeof(long long)); 555 if (actual_size > DefaultMHSize) { 556 mhsize = (int) ceil( (double) actual_size/DefaultMHSize); // it's rounded to ceiling of this value 557 mhsize *= DefaultMHSize; 558 } 559 else { 560 mhsize = DefaultMHSize; 561 } 562 } 563 return mhsize; 564 } 565 566 /** 567 * Computes correct color of a rank according to number of files. 568 */ 569 extern "C" int computeColor( int myrank, int numprocs, int nfiles) { 570 int color = 571 (int)(myrank / (numprocs / nfiles)); 572 return color; 573 } 574 575 576 /** 577 * Check the file descriptor. 578 */ 579 void checkFileDescriptor(const char fctname[], 580 int* fileDescriptor ) { 581 if ( *fileDescriptor < 0 ) { 582 printf("Error: File descriptor = %d in %s\n",*fileDescriptor,fctname); 583 exit(1); 584 } 585 } 586 587 /** 588 * Initialize the file struct members and allocate space for file struct 589 * buffers. 590 * 591 * Note: this function is only called when we are using new format. Old POSIX 592 * format should skip this routine and call openfile() directly instead. 593 */ 594 int initphmpiio( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[]) 595 { 596 // we init irank again in case query not called (e.g. syncIO write case) 597 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 598 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 599 600 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 601 602 double timer_start, timer_end; 603 startTimer(&timer_start); 604 605 char* imode = StringStripper( mode ); 606 607 // Note: if it's read, we presume query was called prior to init and 608 // MasterHeaderSize is already set to correct value from parsing header 609 // otherwise it's write then it needs some computation to be set 610 if ( cscompare( "read", imode ) ) { 611 // do nothing 612 } 613 else if( cscompare( "write", imode ) ) { 614 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 615 } 616 else { 617 printf("Error initphmpiio: can't recognize the mode %s", imode); 618 exit(1); 619 } 620 free ( imode ); 621 622 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 623 624 int i, j; 625 626 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 627 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 628 endTimer(&timer_end); 629 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 630 return MAX_PHASTA_FILES_EXCEEDED; 631 } 632 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 633 // { 634 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 635 // { 636 // PhastaIOActiveFiles[i] = NULL; 637 // } 638 // } 639 640 641 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 642 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 643 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 644 //if( mem_address & (pool_align -1) ) 645 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 646 647 i = PhastaIONextActiveIndex; 648 PhastaIONextActiveIndex++; 649 650 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 651 652 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 653 654 PhastaIOActiveFiles[i]->Wrong_Endian = false; 655 656 PhastaIOActiveFiles[i]->nFields = *nfields; 657 PhastaIOActiveFiles[i]->nPPF = *nppf; 658 PhastaIOActiveFiles[i]->nFiles = *nfiles; 659 MPI_Comm_rank(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->myrank)); 660 MPI_Comm_size(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->numprocs)); 661 662 663 if( *nfiles > 1 ) { // split the ranks according to each mpiio file 664 665 if ( s_assign_local_comm == 0) { // call mpi_comm_split for the first (and only) time 666 667 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Building subcommunicator\n"); 668 669 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 670 MPI_Comm_split(MPI_COMM_WORLD, 671 color, 672 PhastaIOActiveFiles[i]->myrank, 673 &(PhastaIOActiveFiles[i]->local_comm)); 674 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 675 &(PhastaIOActiveFiles[i]->local_numprocs)); 676 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 677 &(PhastaIOActiveFiles[i]->local_myrank)); 678 679 // back up now these variables so that we do not need to call comm_split again 680 s_local_comm = PhastaIOActiveFiles[i]->local_comm; 681 s_local_size = PhastaIOActiveFiles[i]->local_numprocs; 682 s_local_rank = PhastaIOActiveFiles[i]->local_myrank; 683 s_assign_local_comm = 1; 684 } 685 else { // recycle the subcommunicator 686 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Recycling subcommunicator\n"); 687 PhastaIOActiveFiles[i]->local_comm = s_local_comm; 688 PhastaIOActiveFiles[i]->local_numprocs = s_local_size; 689 PhastaIOActiveFiles[i]->local_myrank = s_local_rank; 690 } 691 } 692 else { // *nfiles == 1 here - no need to call mpi_comm_split here 693 694 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Bypassing subcommunicator\n"); 695 PhastaIOActiveFiles[i]->local_comm = MPI_COMM_WORLD; 696 PhastaIOActiveFiles[i]->local_numprocs = PhastaIOActiveFiles[i]->numprocs; 697 PhastaIOActiveFiles[i]->local_myrank = PhastaIOActiveFiles[i]->myrank; 698 699 } 700 701 PhastaIOActiveFiles[i]->nppp = 702 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 703 704 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 705 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 706 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 707 708 PhastaIOActiveFiles[i]->my_offset_table = 709 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 710 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 711 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 712 //if( mem_address & (pool_align -1) ) 713 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 714 715 PhastaIOActiveFiles[i]->my_read_table = 716 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 717 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 718 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 719 //if( mem_address & (pool_align -1) ) 720 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 721 722 for (j=0; j<*nfields; j++) 723 { 724 PhastaIOActiveFiles[i]->my_offset_table[j] = 725 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 726 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 727 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 728 //if( mem_address & (pool_align -1) ) 729 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 730 731 PhastaIOActiveFiles[i]->my_read_table[j] = 732 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 733 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 734 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 735 //if( mem_address & (pool_align -1) ) 736 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 737 } 738 *filehandle = i; 739 740 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 741 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 742 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 743 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 744 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 745 746 /* 747 PhastaIOActiveFiles[i]->master_header = 748 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 749 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 750 if( mem_address & (pool_align -1) ) 751 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 752 753 PhastaIOActiveFiles[i]->double_chunk = 754 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 755 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 756 if( mem_address & (pool_align -1) ) 757 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 758 759 PhastaIOActiveFiles[i]->int_chunk = 760 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 761 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 762 if( mem_address & (pool_align -1) ) 763 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 764 765 PhastaIOActiveFiles[i]->read_double_chunk = 766 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 767 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 768 if( mem_address & (pool_align -1) ) 769 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 770 771 PhastaIOActiveFiles[i]->read_int_chunk = 772 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 773 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 774 if( mem_address & (pool_align -1) ) 775 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 776 */ 777 778 // Time monitoring 779 endTimer(&timer_end); 780 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 781 782 phprintf_0("Info initphmpiio: quiting function"); 783 784 return i; 785 } 786 787 /** 788 * Destruct the file struct and free buffers allocated in init function. 789 */ 790 void finalizephmpiio( int *fileDescriptor ) 791 { 792 double timer_start, timer_end; 793 startTimer(&timer_start); 794 795 int i, j; 796 i = *fileDescriptor; 797 //PhastaIONextActiveIndex--; 798 799 /* //free the offset table for this phasta file */ 800 //for(j=0; j<MAX_FIELDS_NUMBER; j++) //Danger: undefined behavior for my_*_table.[j] not allocated or not initialized to NULL 801 for(j=0; j<PhastaIOActiveFiles[i]->nFields; j++) 802 { 803 free( PhastaIOActiveFiles[i]->my_offset_table[j]); 804 free( PhastaIOActiveFiles[i]->my_read_table[j]); 805 } 806 free ( PhastaIOActiveFiles[i]->my_offset_table ); 807 free ( PhastaIOActiveFiles[i]->my_read_table ); 808 free ( PhastaIOActiveFiles[i]->master_header ); 809 free ( PhastaIOActiveFiles[i]->double_chunk ); 810 free ( PhastaIOActiveFiles[i]->int_chunk ); 811 free ( PhastaIOActiveFiles[i]->read_double_chunk ); 812 free ( PhastaIOActiveFiles[i]->read_int_chunk ); 813 814 free( PhastaIOActiveFiles[i]); 815 816 endTimer(&timer_end); 817 printPerf("finalizempiio", timer_start, timer_end, 0, 0, ""); 818 819 PhastaIONextActiveIndex--; 820 } 821 822 823 /** 824 * Special init for M2N in order to create a subcommunicator for the reduced solution (requires PRINT_PERF to be false for now) 825 * Initialize the file struct members and allocate space for file struct buffers. 826 * 827 * Note: this function is only called when we are using new format. Old POSIX 828 * format should skip this routine and call openfile() directly instead. 829 */ 830 int initphmpiiosub( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[],MPI_Comm my_local_comm) 831 { 832 // we init irank again in case query not called (e.g. syncIO write case) 833 834 MPI_Comm_rank(my_local_comm, &irank); 835 MPI_Comm_size(my_local_comm, &mysize); 836 837 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 838 839 double timer_start, timer_end; 840 startTimer(&timer_start); 841 842 char* imode = StringStripper( mode ); 843 844 // Note: if it's read, we presume query was called prior to init and 845 // MasterHeaderSize is already set to correct value from parsing header 846 // otherwise it's write then it needs some computation to be set 847 if ( cscompare( "read", imode ) ) { 848 // do nothing 849 } 850 else if( cscompare( "write", imode ) ) { 851 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 852 } 853 else { 854 printf("Error initphmpiio: can't recognize the mode %s", imode); 855 exit(1); 856 } 857 free ( imode ); 858 859 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 860 861 int i, j; 862 863 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 864 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 865 endTimer(&timer_end); 866 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 867 return MAX_PHASTA_FILES_EXCEEDED; 868 } 869 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 870 // { 871 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 872 // { 873 // PhastaIOActiveFiles[i] = NULL; 874 // } 875 // } 876 877 878 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 879 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 880 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 881 //if( mem_address & (pool_align -1) ) 882 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 883 884 i = PhastaIONextActiveIndex; 885 PhastaIONextActiveIndex++; 886 887 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 888 889 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 890 891 PhastaIOActiveFiles[i]->Wrong_Endian = false; 892 893 PhastaIOActiveFiles[i]->nFields = *nfields; 894 PhastaIOActiveFiles[i]->nPPF = *nppf; 895 PhastaIOActiveFiles[i]->nFiles = *nfiles; 896 MPI_Comm_rank(my_local_comm, &(PhastaIOActiveFiles[i]->myrank)); 897 MPI_Comm_size(my_local_comm, &(PhastaIOActiveFiles[i]->numprocs)); 898 899 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 900 MPI_Comm_split(my_local_comm, 901 color, 902 PhastaIOActiveFiles[i]->myrank, 903 &(PhastaIOActiveFiles[i]->local_comm)); 904 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 905 &(PhastaIOActiveFiles[i]->local_numprocs)); 906 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 907 &(PhastaIOActiveFiles[i]->local_myrank)); 908 PhastaIOActiveFiles[i]->nppp = 909 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 910 911 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 912 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 913 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 914 915 PhastaIOActiveFiles[i]->my_offset_table = 916 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 917 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 918 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 919 //if( mem_address & (pool_align -1) ) 920 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 921 922 PhastaIOActiveFiles[i]->my_read_table = 923 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 924 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 925 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 926 //if( mem_address & (pool_align -1) ) 927 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 928 929 for (j=0; j<*nfields; j++) 930 { 931 PhastaIOActiveFiles[i]->my_offset_table[j] = 932 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 933 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 934 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 935 //if( mem_address & (pool_align -1) ) 936 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 937 938 PhastaIOActiveFiles[i]->my_read_table[j] = 939 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 940 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 941 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 942 //if( mem_address & (pool_align -1) ) 943 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 944 } 945 *filehandle = i; 946 947 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 948 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 949 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 950 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 951 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 952 953 /* 954 PhastaIOActiveFiles[i]->master_header = 955 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 956 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 957 if( mem_address & (pool_align -1) ) 958 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 959 960 PhastaIOActiveFiles[i]->double_chunk = 961 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 962 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 963 if( mem_address & (pool_align -1) ) 964 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 965 966 PhastaIOActiveFiles[i]->int_chunk = 967 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 968 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 969 if( mem_address & (pool_align -1) ) 970 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 971 972 PhastaIOActiveFiles[i]->read_double_chunk = 973 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 974 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 975 if( mem_address & (pool_align -1) ) 976 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 977 978 PhastaIOActiveFiles[i]->read_int_chunk = 979 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 980 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 981 if( mem_address & (pool_align -1) ) 982 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 983 */ 984 985 // Time monitoring 986 endTimer(&timer_end); 987 printPerf("initphmpiiosub", timer_start, timer_end, 0, 0, ""); 988 989 phprintf_0("Info initphmpiiosub: quiting function"); 990 991 return i; 992 } 993 994 995 996 /** open file for both POSIX and MPI-IO syncIO format. 997 * 998 * If it's old POSIX format, simply call posix fopen(). 999 * 1000 * If it's MPI-IO foramt: 1001 * in "read" mode, it builds the header table that points to the offset of 1002 * fields for parts; 1003 * in "write" mode, it opens the file with MPI-IO open routine. 1004 */ 1005 void openfile(const char filename[], 1006 const char mode[], 1007 int* fileDescriptor ) 1008 { 1009 phprintf_0("Info: entering openfile"); 1010 1011 double timer_start, timer_end; 1012 startTimer(&timer_start); 1013 1014 if ( PhastaIONextActiveIndex == 0 ) 1015 { 1016 FILE* file=NULL ; 1017 *fileDescriptor = 0; 1018 char* fname = StringStripper( filename ); 1019 char* imode = StringStripper( mode ); 1020 1021 if ( cscompare( "read", imode ) ) file = fopen(fname, "rb" ); 1022 else if( cscompare( "write", imode ) ) file = fopen(fname, "wb" ); 1023 else if( cscompare( "append", imode ) ) file = fopen(fname, "ab" ); 1024 1025 if ( !file ){ 1026 fprintf(stderr,"Error openfile: unable to open file %s",fname ) ; 1027 } else { 1028 fileArray.push_back( file ); 1029 byte_order.push_back( false ); 1030 header_type.push_back( sizeof(int) ); 1031 *fileDescriptor = fileArray.size(); 1032 } 1033 free (fname); 1034 free (imode); 1035 } 1036 else // else it would be parallel I/O, opposed to posix io 1037 { 1038 char* fname = StringStripper( filename ); 1039 char* imode = StringStripper( mode ); 1040 int rc; 1041 int i = *fileDescriptor; 1042 checkFileDescriptor("openfile",&i); 1043 char* token; 1044 1045 if ( cscompare( "read", imode ) ) 1046 { 1047 // if (PhastaIOActiveFiles[i]->myrank == 0) 1048 // printf("\n **********\nRead open ... ... regular version\n"); 1049 1050 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1051 fname, 1052 MPI_MODE_RDONLY, 1053 MPI_INFO_NULL, 1054 &(PhastaIOActiveFiles[i]->file_handle) ); 1055 1056 if(rc) 1057 { 1058 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1059 printf("Error openfile: Unable to open file %s! File descriptor = %d\n",fname,*fileDescriptor); 1060 endTimer(&timer_end); 1061 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1062 return; 1063 } 1064 1065 MPI_Status read_tag_status; 1066 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1067 int j; 1068 int magic_number; 1069 1070 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1071 MPI_File_read_at( PhastaIOActiveFiles[i]->file_handle, 1072 0, 1073 PhastaIOActiveFiles[i]->master_header, 1074 MasterHeaderSize, 1075 MPI_CHAR, 1076 &read_tag_status ); 1077 } 1078 1079 MPI_Bcast( PhastaIOActiveFiles[i]->master_header, 1080 MasterHeaderSize, 1081 MPI_CHAR, 1082 0, 1083 PhastaIOActiveFiles[i]->local_comm ); 1084 1085 memcpy( read_out_tag, 1086 PhastaIOActiveFiles[i]->master_header, 1087 MAX_FIELDS_NAME_LENGTH-1 ); 1088 1089 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) 1090 { 1091 // Test endianess ... 1092 memcpy ( &magic_number, 1093 PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1094 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 1095 1096 if ( magic_number != ENDIAN_TEST_NUMBER ) 1097 { 1098 PhastaIOActiveFiles[i]->Wrong_Endian = true; 1099 } 1100 1101 memcpy( read_out_tag, 1102 PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH+1, // TODO: WHY +1??? 1103 MAX_FIELDS_NAME_LENGTH ); 1104 1105 // Read in # fields ... 1106 token = strtok ( read_out_tag, ":" ); 1107 token = strtok( NULL," ,;<>" ); 1108 PhastaIOActiveFiles[i]->nFields = atoi( token ); 1109 1110 unsigned long long **header_table; 1111 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1112 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1113 //mem_address = (long long )header_table; 1114 //if( mem_address & (pool_align -1) ) 1115 // header_table += pool_align - (mem_address & (pool_align -1)); 1116 1117 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1118 { 1119 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1120 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof(unsigned long long *)); 1121 //mem_address = (long long )header_table[j]; 1122 //if( mem_address & (pool_align -1) ) 1123 // header_table[j] += pool_align - (mem_address & (pool_align -1)); 1124 } 1125 1126 // Read in the offset table ... 1127 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1128 { 1129 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1130 memcpy( header_table[j], 1131 PhastaIOActiveFiles[i]->master_header + 1132 VERSION_INFO_HEADER_SIZE + 1133 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1134 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1135 } 1136 1137 MPI_Scatter( header_table[j], 1138 PhastaIOActiveFiles[i]->nppp, 1139 MPI_LONG_LONG_INT, 1140 PhastaIOActiveFiles[i]->my_read_table[j], 1141 PhastaIOActiveFiles[i]->nppp, 1142 MPI_LONG_LONG_INT, 1143 0, 1144 PhastaIOActiveFiles[i]->local_comm ); 1145 1146 // Swap byte order if endianess is different ... 1147 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) { 1148 SwapArrayByteOrder( PhastaIOActiveFiles[i]->my_read_table[j], 1149 sizeof(long long int), 1150 PhastaIOActiveFiles[i]->nppp ); 1151 } 1152 } 1153 1154 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1155 free ( header_table[j] ); 1156 } 1157 free (header_table); 1158 1159 } // end of if MPI_IO_TAG 1160 else //else not valid MPI file 1161 { 1162 *fileDescriptor = NOT_A_MPI_FILE; 1163 printf("Error openfile: The file %s you opened is not in syncIO new format, please check again! File descriptor = %d, MasterHeaderSize = %d, read_out_tag = %s\n",fname,*fileDescriptor,MasterHeaderSize,read_out_tag); 1164 //Printing MasterHeaderSize is useful to test a compiler bug on Intrepid BGP 1165 endTimer(&timer_end); 1166 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1167 return; 1168 } 1169 } // end of if "read" 1170 else if( cscompare( "write", imode ) ) 1171 { 1172 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1173 fname, 1174 MPI_MODE_WRONLY | MPI_MODE_CREATE, 1175 MPI_INFO_NULL, 1176 &(PhastaIOActiveFiles[i]->file_handle) ); 1177 if(rc) 1178 { 1179 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1180 return; 1181 } 1182 } // end of if "write" 1183 free (fname); 1184 free (imode); 1185 } // end of if FileIndex != 0 1186 1187 endTimer(&timer_end); 1188 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1189 } 1190 1191 /** close file for both POSIX and MPI-IO syncIO format. 1192 * 1193 * If it's old POSIX format, simply call posix fclose(). 1194 * 1195 * If it's MPI-IO foramt: 1196 * in "read" mode, it simply close file with MPI-IO close routine. 1197 * in "write" mode, rank 0 in each group will re-assemble the master header and 1198 * offset table and write to the beginning of file, then close the file. 1199 */ 1200 void closefile( int* fileDescriptor, 1201 const char mode[] ) 1202 { 1203 double timer_start, timer_end; 1204 startTimer(&timer_start); 1205 1206 int i = *fileDescriptor; 1207 checkFileDescriptor("closefile",&i); 1208 1209 if ( PhastaIONextActiveIndex == 0 ) { 1210 char* imode = StringStripper( mode ); 1211 1212 if( cscompare( "write", imode ) 1213 || cscompare( "append", imode ) ) { 1214 fflush( fileArray[ *fileDescriptor - 1 ] ); 1215 } 1216 1217 fclose( fileArray[ *fileDescriptor - 1 ] ); 1218 free (imode); 1219 } 1220 else { 1221 char* imode = StringStripper( mode ); 1222 1223 //write master header here: 1224 if ( cscompare( "write", imode ) ) { 1225 // if ( PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields < 2*ONE_MEGABYTE/8 ) //SHOULD BE CHECKED 1226 // MasterHeaderSize = 4*ONE_MEGABYTE; 1227 // else 1228 // MasterHeaderSize = 4*ONE_MEGABYTE + PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields * 8 - 2*ONE_MEGABYTE; 1229 1230 MasterHeaderSize = computeMHSize( PhastaIOActiveFiles[i]->nFields, PhastaIOActiveFiles[i]->nPPF, LATEST_WRITE_VERSION); 1231 phprintf_0("Info closefile: myrank = %d, MasterHeaderSize = %d\n", PhastaIOActiveFiles[i]->myrank, MasterHeaderSize); 1232 1233 MPI_Status write_header_status; 1234 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1235 char version[MAX_FIELDS_NAME_LENGTH/4]; 1236 char mhsize[MAX_FIELDS_NAME_LENGTH/4]; 1237 int magic_number = ENDIAN_TEST_NUMBER; 1238 1239 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) 1240 { 1241 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1242 sprintf(mpi_tag, "MPI_IO_Tag : "); 1243 memcpy(PhastaIOActiveFiles[i]->master_header, 1244 mpi_tag, 1245 MAX_FIELDS_NAME_LENGTH); 1246 1247 bzero((void*)version,MAX_FIELDS_NAME_LENGTH/4); 1248 // this version is "1", print version in ASCII 1249 sprintf(version, "version : %d",1); 1250 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/2, 1251 version, 1252 MAX_FIELDS_NAME_LENGTH/4); 1253 1254 // master header size is computed using the formula above 1255 bzero((void*)mhsize,MAX_FIELDS_NAME_LENGTH/4); 1256 sprintf(mhsize, "mhsize : "); 1257 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/4*3, 1258 mhsize, 1259 MAX_FIELDS_NAME_LENGTH/4); 1260 1261 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1262 sprintf(mpi_tag, 1263 "\nnFields : %d\n", 1264 PhastaIOActiveFiles[i]->nFields); 1265 memcpy(PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH, 1266 mpi_tag, 1267 MAX_FIELDS_NAME_LENGTH); 1268 1269 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1270 sprintf(mpi_tag, "\nnPPF : %d\n", PhastaIOActiveFiles[i]->nPPF); 1271 memcpy( PhastaIOActiveFiles[i]->master_header+ 1272 PhastaIOActiveFiles[i]->nFields * 1273 MAX_FIELDS_NAME_LENGTH + 1274 MAX_FIELDS_NAME_LENGTH * 2, 1275 mpi_tag, 1276 MAX_FIELDS_NAME_LENGTH); 1277 1278 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1279 &magic_number, 1280 sizeof(int)); 1281 1282 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("mhsize : ") -1 + MAX_FIELDS_NAME_LENGTH/4*3, 1283 &MasterHeaderSize, 1284 sizeof(int)); 1285 } 1286 1287 int j = 0; 1288 unsigned long long **header_table; 1289 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1290 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1291 //mem_address = (long long )header_table; 1292 //if( mem_address & (pool_align -1) ) 1293 // header_table += pool_align - (mem_address & (pool_align -1)); 1294 1295 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1296 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1297 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof (unsigned long long *)); 1298 //mem_address = (long long )header_table[j]; 1299 //if( mem_address & (pool_align -1) ) 1300 // header_table[j] += pool_align - (mem_address & (pool_align - 1)); 1301 } 1302 1303 //if( irank == 0 ) printf("gonna mpi_gather, myrank = %d\n", irank); 1304 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1305 MPI_Gather( PhastaIOActiveFiles[i]->my_offset_table[j], 1306 PhastaIOActiveFiles[i]->nppp, 1307 MPI_LONG_LONG_INT, 1308 header_table[j], 1309 PhastaIOActiveFiles[i]->nppp, 1310 MPI_LONG_LONG_INT, 1311 0, 1312 PhastaIOActiveFiles[i]->local_comm ); 1313 } 1314 1315 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1316 1317 //if( irank == 0 ) printf("gonna memcpy for every procs, myrank = %d\n", irank); 1318 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1319 memcpy ( PhastaIOActiveFiles[i]->master_header + 1320 VERSION_INFO_HEADER_SIZE + 1321 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1322 header_table[j], 1323 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1324 } 1325 1326 //if( irank == 0 ) printf("gonna file_write_at(), myrank = %d\n", irank); 1327 MPI_File_write_at( PhastaIOActiveFiles[i]->file_handle, 1328 0, 1329 PhastaIOActiveFiles[i]->master_header, 1330 MasterHeaderSize, 1331 MPI_CHAR, 1332 &write_header_status ); 1333 } 1334 1335 ////free(PhastaIOActiveFiles[i]->master_header); 1336 1337 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1338 free ( header_table[j] ); 1339 } 1340 free (header_table); 1341 } 1342 1343 //if( irank == 0 ) printf("gonna file_close(), myrank = %d\n", irank); 1344 MPI_File_close( &( PhastaIOActiveFiles[i]->file_handle ) ); 1345 free ( imode ); 1346 } 1347 1348 endTimer(&timer_end); 1349 printPerf("closefile_", timer_start, timer_end, 0, 0, ""); 1350 } 1351 1352 void readheader( int* fileDescriptor, 1353 const char keyphrase[], 1354 void* valueArray, 1355 int* nItems, 1356 const char datatype[], 1357 const char iotype[] ) 1358 { 1359 double timer_start, timer_end; 1360 1361 startTimer(&timer_start); 1362 1363 int i = *fileDescriptor; 1364 checkFileDescriptor("readheader",&i); 1365 1366 if ( PhastaIONextActiveIndex == 0 ) { 1367 int filePtr = *fileDescriptor - 1; 1368 FILE* fileObject; 1369 int* valueListInt; 1370 1371 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1372 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1373 fprintf(stderr,"openfile_ function has to be called before \n") ; 1374 fprintf(stderr,"acessing the file\n ") ; 1375 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1376 endTimer(&timer_end); 1377 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1378 return; 1379 } 1380 1381 if( LastHeaderKey.count(filePtr) ) 1382 free(LastHeaderKey[filePtr]); 1383 const int l = strlen(keyphrase)+1; 1384 LastHeaderKey[filePtr] = (char*) malloc(l*sizeof(char)); 1385 strcpy(LastHeaderKey[filePtr], keyphrase); 1386 LastHeaderNotFound = false; 1387 1388 fileObject = fileArray[ filePtr ] ; 1389 Wrong_Endian = byte_order[ filePtr ]; 1390 1391 isBinary( iotype ); 1392 typeSize( datatype ); //redundant call, just avoid a compiler warning. 1393 1394 // right now we are making the assumption that we will only write integers 1395 // on the header line. 1396 1397 valueListInt = static_cast< int* >( valueArray ); 1398 int ierr = readHeader( fileObject , 1399 keyphrase, 1400 valueListInt, 1401 *nItems ) ; 1402 1403 byte_order[ filePtr ] = Wrong_Endian ; 1404 1405 if ( ierr ) LastHeaderNotFound = true; 1406 1407 //return ; // don't return, go to the end to print perf 1408 } 1409 else { 1410 unsigned int skip_size; 1411 int* valueListInt; 1412 valueListInt = static_cast <int*>(valueArray); 1413 char* token = NULL; 1414 bool FOUND = false ; 1415 isBinary( iotype ); 1416 1417 MPI_Status read_offset_status; 1418 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1419 memset(read_out_tag, '\0', MAX_FIELDS_NAME_LENGTH); 1420 char readouttag[MAX_FIELDS_NUMBER][MAX_FIELDS_NAME_LENGTH]; 1421 int j; 1422 1423 int string_length = strlen( keyphrase ); 1424 char* buffer = (char*) malloc ( string_length+1 ); 1425 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1426 //mem_address = (long long )buffer; 1427 //if( mem_address & (pool_align -1) ) 1428 // buffer += pool_align - (mem_address & (pool_align -1)); 1429 1430 strcpy ( buffer, keyphrase ); 1431 buffer[ string_length ] = '\0'; 1432 1433 char* st2 = strtok ( buffer, "@" ); 1434 st2 = strtok (NULL, "@"); 1435 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1436 if ( char* p = strpbrk(buffer, "@") ) 1437 *p = '\0'; 1438 1439 // Check if the user has input the right GPid 1440 if ( ( PhastaIOActiveFiles[i]->GPid <= 1441 PhastaIOActiveFiles[i]->myrank * 1442 PhastaIOActiveFiles[i]->nppp )|| 1443 ( PhastaIOActiveFiles[i]->GPid > 1444 ( PhastaIOActiveFiles[i]->myrank + 1 ) * 1445 PhastaIOActiveFiles[i]->nppp ) ) 1446 { 1447 *fileDescriptor = NOT_A_MPI_FILE; 1448 printf("Error readheader: The file is not in syncIO new format, please check! myrank = %d, GPid = %d, nppp = %d, keyphrase = %s\n", PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->GPid, PhastaIOActiveFiles[i]->nppp, keyphrase); 1449 // It is possible atoi could not generate a clear integer from st2 because of additional garbage character in keyphrase 1450 endTimer(&timer_end); 1451 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1452 return; 1453 } 1454 1455 // Find the field we want ... 1456 //for ( j = 0; j<MAX_FIELDS_NUMBER; j++ ) 1457 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1458 { 1459 memcpy( readouttag[j], 1460 PhastaIOActiveFiles[i]->master_header + j*MAX_FIELDS_NAME_LENGTH+MAX_FIELDS_NAME_LENGTH*2+1, 1461 MAX_FIELDS_NAME_LENGTH-1 ); 1462 } 1463 1464 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1465 { 1466 token = strtok ( readouttag[j], ":" ); 1467 1468 //if ( cscompare( buffer, token ) ) 1469 if ( cscompare( token , buffer ) && cscompare( buffer, token ) ) 1470 // This double comparison is required for the field "number of nodes" and all the other fields that start with "number of nodes" (i.g. number of nodes in the mesh"). 1471 // Would be safer to rename "number of nodes" by "number of nodes in the part" so that the name are completely unique. But much more work to do that (Nspre, phParAdapt, etc). 1472 // Since the field name are unique in SyncIO (as it includes part ID), this should be safe and there should be no issue with the "?" trailing character. 1473 { 1474 PhastaIOActiveFiles[i]->read_field_count = j; 1475 FOUND = true; 1476 //printf("buffer: %s | token: %s | j: %d\n",buffer,token,j); 1477 break; 1478 } 1479 } 1480 free(buffer); 1481 1482 if (!FOUND) 1483 { 1484 //if(irank==0) printf("Warning readheader: Not found %s \n",keyphrase); //PhastaIOActiveFiles[i]->myrank is certainly initialized here. 1485 if(PhastaIOActiveFiles[i]->myrank == 0) printf("WARNING readheader: Not found %s\n",keyphrase); 1486 endTimer(&timer_end); 1487 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1488 return; 1489 } 1490 1491 // Find the part we want ... 1492 PhastaIOActiveFiles[i]->read_part_count = PhastaIOActiveFiles[i]->GPid - 1493 PhastaIOActiveFiles[i]->myrank * PhastaIOActiveFiles[i]->nppp - 1; 1494 1495 PhastaIOActiveFiles[i]->my_offset = 1496 PhastaIOActiveFiles[i]->my_read_table[PhastaIOActiveFiles[i]->read_field_count][PhastaIOActiveFiles[i]->read_part_count]; 1497 1498 // printf("****Rank %d offset is %d\n",PhastaIOActiveFiles[i]->myrank,PhastaIOActiveFiles[i]->my_offset); 1499 1500 // Read each datablock header here ... 1501 1502 MPI_File_read_at_all( PhastaIOActiveFiles[i]->file_handle, 1503 PhastaIOActiveFiles[i]->my_offset+1, 1504 read_out_tag, 1505 MAX_FIELDS_NAME_LENGTH-1, 1506 MPI_CHAR, 1507 &read_offset_status ); 1508 token = strtok ( read_out_tag, ":" ); 1509 1510 // printf("&&&&Rank %d read_out_tag is %s\n",PhastaIOActiveFiles[i]->myrank,read_out_tag); 1511 1512 if( cscompare( keyphrase , token ) ) //No need to compare also token with keyphrase like above. We should already have the right one. Otherwise there is a problem. 1513 { 1514 FOUND = true ; 1515 token = strtok( NULL, " ,;<>" ); 1516 skip_size = atoi( token ); 1517 for( j=0; j < *nItems && ( token = strtok( NULL," ,;<>") ); j++ ) 1518 valueListInt[j] = atoi( token ); 1519 1520 if ( j < *nItems ) 1521 { 1522 fprintf( stderr, "Expected # of ints not found for: %s\n", keyphrase ); 1523 } 1524 } 1525 else { 1526 //if(irank==0) 1527 if(PhastaIOActiveFiles[i]->myrank == 0) 1528 // If we enter this if, there is a problem with the name of some fields 1529 { 1530 printf("Error readheader: Unexpected mismatch between keyphrase = %s and token = %s\n",keyphrase,token); 1531 } 1532 } 1533 } 1534 1535 endTimer(&timer_end); 1536 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1537 1538 } 1539 1540 void readdatablock( int* fileDescriptor, 1541 const char keyphrase[], 1542 void* valueArray, 1543 int* nItems, 1544 const char datatype[], 1545 const char iotype[] ) 1546 { 1547 //if(irank == 0) printf("entering readdatablock()\n"); 1548 unsigned long long data_size = 0; 1549 double timer_start, timer_end; 1550 startTimer(&timer_start); 1551 1552 int i = *fileDescriptor; 1553 checkFileDescriptor("readdatablock",&i); 1554 1555 if ( PhastaIONextActiveIndex == 0 ) { 1556 int filePtr = *fileDescriptor - 1; 1557 FILE* fileObject; 1558 char junk; 1559 1560 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1561 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1562 fprintf(stderr,"openfile_ function has to be called before\n") ; 1563 fprintf(stderr,"acessing the file\n ") ; 1564 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1565 endTimer(&timer_end); 1566 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1567 return; 1568 } 1569 1570 // error check.. 1571 // since we require that a consistant header always preceed the data block 1572 // let us check to see that it is actually the case. 1573 1574 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1575 fprintf(stderr, "Header not consistant with data block\n"); 1576 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1577 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1578 fprintf(stderr, "Please recheck read sequence \n"); 1579 if( Strict_Error ) { 1580 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1581 endTimer(&timer_end); 1582 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1583 return; 1584 } 1585 } 1586 1587 if ( LastHeaderNotFound ) { 1588 endTimer(&timer_end); 1589 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1590 return; 1591 } 1592 fileObject = fileArray[ filePtr ]; 1593 Wrong_Endian = byte_order[ filePtr ]; 1594 1595 size_t type_size = typeSize( datatype ); 1596 int nUnits = *nItems; 1597 isBinary( iotype ); 1598 1599 free(LastHeaderKey[filePtr]); 1600 LastHeaderKey.erase(filePtr); 1601 1602 if ( binary_format ) { 1603 fread( valueArray, type_size, nUnits, fileObject ); 1604 fread( &junk, sizeof(char), 1 , fileObject ); 1605 if ( Wrong_Endian ) SwapArrayByteOrder( valueArray, type_size, nUnits ); 1606 } else { 1607 1608 char* ts1 = StringStripper( datatype ); 1609 if ( cscompare( "integer", ts1 ) ) { 1610 for( int n=0; n < nUnits ; n++ ) 1611 fscanf(fileObject, "%d\n",(int*)((int*)valueArray+n) ); 1612 } else if ( cscompare( "double", ts1 ) ) { 1613 for( int n=0; n < nUnits ; n++ ) 1614 fscanf(fileObject, "%lf\n",(double*)((double*)valueArray+n) ); 1615 } 1616 free (ts1); 1617 } 1618 1619 //return; 1620 } 1621 else { 1622 // printf("read data block\n"); 1623 MPI_Status read_data_status; 1624 size_t type_size = typeSize( datatype ); 1625 int nUnits = *nItems; 1626 isBinary( iotype ); 1627 1628 // read datablock then 1629 //MR CHANGE 1630 // if ( cscompare ( datatype, "double")) 1631 char* ts2 = StringStripper( datatype ); 1632 if ( cscompare ( "double" , ts2)) 1633 //MR CHANGE END 1634 { 1635 1636 MPI_File_read_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 1637 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1638 valueArray, 1639 nUnits, 1640 MPI_DOUBLE ); 1641 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1642 valueArray, 1643 &read_data_status ); 1644 data_size=8*nUnits; 1645 1646 } 1647 //MR CHANGE 1648 // else if ( cscompare ( datatype, "integer")) 1649 else if ( cscompare ( "integer" , ts2)) 1650 //MR CHANGE END 1651 { 1652 MPI_File_read_at_all_begin(PhastaIOActiveFiles[i]->file_handle, 1653 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1654 valueArray, 1655 nUnits, 1656 MPI_INT ); 1657 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1658 valueArray, 1659 &read_data_status ); 1660 data_size=4*nUnits; 1661 } 1662 else 1663 { 1664 *fileDescriptor = DATA_TYPE_ILLEGAL; 1665 printf("readdatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 1666 endTimer(&timer_end); 1667 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1668 return; 1669 } 1670 free(ts2); 1671 1672 1673 // printf("%d Read finishe\n",PhastaIOActiveFiles[i]->myrank); 1674 1675 // Swap data byte order if endianess is different ... 1676 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) 1677 { 1678 SwapArrayByteOrder( valueArray, type_size, nUnits ); 1679 } 1680 } 1681 1682 endTimer(&timer_end); 1683 char extra_msg[1024]; 1684 memset(extra_msg, '\0', 1024); 1685 char* key = StringStripper(keyphrase); 1686 sprintf(extra_msg, " field is %s ", key); 1687 printPerf("readdatablock", timer_start, timer_end, data_size, 1, extra_msg); 1688 free(key); 1689 1690 } 1691 1692 void writeheader( const int* fileDescriptor, 1693 const char keyphrase[], 1694 const void* valueArray, 1695 const int* nItems, 1696 const int* ndataItems, 1697 const char datatype[], 1698 const char iotype[]) 1699 { 1700 1701 //if(irank == 0) printf("entering writeheader()\n"); 1702 1703 double timer_start, timer_end; 1704 startTimer(&timer_start); 1705 1706 int i = *fileDescriptor; 1707 checkFileDescriptor("writeheader",&i); 1708 1709 if ( PhastaIONextActiveIndex == 0 ) { 1710 int filePtr = *fileDescriptor - 1; 1711 FILE* fileObject; 1712 1713 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1714 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1715 fprintf(stderr,"openfile_ function has to be called before \n") ; 1716 fprintf(stderr,"acessing the file\n ") ; 1717 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1718 endTimer(&timer_end); 1719 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1720 return; 1721 } 1722 1723 if( LastHeaderKey.count(filePtr) ) 1724 free(LastHeaderKey[filePtr]); 1725 const int l = strlen(keyphrase)+1; 1726 LastHeaderKey[filePtr] = (char*) malloc(l*sizeof(char)); 1727 strcpy(LastHeaderKey[filePtr], keyphrase); 1728 DataSize = *ndataItems; 1729 fileObject = fileArray[ filePtr ] ; 1730 size_t type_size = typeSize( datatype ); 1731 isBinary( iotype ); 1732 header_type[ filePtr ] = type_size; 1733 1734 int _newline = ( *ndataItems > 0 ) ? sizeof( char ) : 0; 1735 int size_of_nextblock = 1736 ( binary_format ) ? type_size*( *ndataItems )+ _newline : *ndataItems ; 1737 1738 fprintf( fileObject, "%s : < %d > ", keyphrase, size_of_nextblock ); 1739 for( int i = 0; i < *nItems; i++ ) 1740 fprintf(fileObject, "%d ", *((int*)((int*)valueArray+i))); 1741 fprintf(fileObject, "\n"); 1742 1743 //return ; 1744 } 1745 else { // else it's parallel I/O 1746 DataSize = *ndataItems; 1747 size_t type_size = typeSize( datatype ); 1748 isBinary( iotype ); 1749 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1750 1751 int string_length = strlen( keyphrase ); 1752 char* buffer = (char*) malloc ( string_length+1 ); 1753 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1754 //mem_address = (long long )buffer; 1755 //if( mem_address & (pool_align -1) ) 1756 // buffer += pool_align - (mem_address & (pool_align -1)); 1757 1758 strcpy ( buffer, keyphrase); 1759 buffer[ string_length ] = '\0'; 1760 1761 char* st2 = strtok ( buffer, "@" ); 1762 st2 = strtok (NULL, "@"); 1763 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1764 1765 if ( char* p = strpbrk(buffer, "@") ) 1766 *p = '\0'; 1767 1768 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1769 sprintf(mpi_tag, "\n%s : %d\n", buffer, PhastaIOActiveFiles[i]->field_count); 1770 unsigned long long offset_value; 1771 1772 int temp = *ndataItems; 1773 unsigned long long number_of_items = (unsigned long long)temp; 1774 MPI_Barrier(PhastaIOActiveFiles[i]->local_comm); 1775 1776 MPI_Scan( &number_of_items, 1777 &offset_value, 1778 1, 1779 MPI_LONG_LONG_INT, 1780 MPI_SUM, 1781 PhastaIOActiveFiles[i]->local_comm ); 1782 1783 offset_value = (offset_value - number_of_items) * type_size; 1784 1785 offset_value += PhastaIOActiveFiles[i]->local_myrank * 1786 DB_HEADER_SIZE + 1787 PhastaIOActiveFiles[i]->next_start_address; 1788 // This offset is the starting address of each datablock header... 1789 PhastaIOActiveFiles[i]->my_offset = offset_value; 1790 1791 // Write in my offset table ... 1792 PhastaIOActiveFiles[i]->my_offset_table[PhastaIOActiveFiles[i]->field_count][PhastaIOActiveFiles[i]->part_count] = 1793 PhastaIOActiveFiles[i]->my_offset; 1794 1795 // Update the next-start-address ... 1796 PhastaIOActiveFiles[i]->next_start_address = offset_value + 1797 number_of_items * type_size + 1798 DB_HEADER_SIZE; 1799 MPI_Bcast( &(PhastaIOActiveFiles[i]->next_start_address), 1800 1, 1801 MPI_LONG_LONG_INT, 1802 PhastaIOActiveFiles[i]->local_numprocs-1, 1803 PhastaIOActiveFiles[i]->local_comm ); 1804 1805 // Prepare datablock header ... 1806 int _newline = (*ndataItems>0)?sizeof(char):0; 1807 unsigned int size_of_nextblock = type_size * (*ndataItems) + _newline; 1808 1809 //char datablock_header[255]; 1810 //bzero((void*)datablock_header,255); 1811 char datablock_header[DB_HEADER_SIZE]; 1812 bzero((void*)datablock_header,DB_HEADER_SIZE); 1813 1814 PhastaIOActiveFiles[i]->GPid = PhastaIOActiveFiles[i]->nppp*PhastaIOActiveFiles[i]->myrank+PhastaIOActiveFiles[i]->part_count; 1815 sprintf( datablock_header, 1816 "\n%s : < %u >", 1817 keyphrase, 1818 size_of_nextblock ); 1819 1820 for ( int j = 0; j < *nItems; j++ ) 1821 { 1822 sprintf( datablock_header, 1823 "%s %d ", 1824 datablock_header, 1825 *((int*)((int*)valueArray+j))); 1826 } 1827 sprintf( datablock_header, 1828 "%s\n ", 1829 datablock_header ); 1830 1831 // Write datablock header ... 1832 //MR CHANGE 1833 // if ( cscompare(datatype,"double") ) 1834 char* ts1 = StringStripper( datatype ); 1835 if ( cscompare("double",ts1) ) 1836 //MR CHANGE END 1837 { 1838 free ( PhastaIOActiveFiles[i]->double_chunk ); 1839 PhastaIOActiveFiles[i]->double_chunk = ( double * )malloc( (sizeof( double )*number_of_items+ DB_HEADER_SIZE)); 1840 //PhastaIOActiveFiles[i]->double_chunk = ( double * ) malloc( sizeof( double )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1841 //mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 1842 //if( mem_address & (pool_align -1) ) 1843 // PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 1844 1845 double * aa = ( double * )datablock_header; 1846 memcpy(PhastaIOActiveFiles[i]->double_chunk, aa, DB_HEADER_SIZE); 1847 } 1848 //MR CHANGE 1849 // if ( cscompare(datatype,"integer") ) 1850 else if ( cscompare("integer",ts1) ) 1851 //MR CHANGE END 1852 { 1853 free ( PhastaIOActiveFiles[i]->int_chunk ); 1854 PhastaIOActiveFiles[i]->int_chunk = ( int * )malloc( (sizeof( int )*number_of_items+ DB_HEADER_SIZE)); 1855 //PhastaIOActiveFiles[i]->int_chunk = ( int * ) malloc( sizeof( int )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1856 //mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 1857 //if( mem_address & (pool_align -1) ) 1858 // PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & ( pool_align -1)); 1859 1860 int * aa = ( int * )datablock_header; 1861 memcpy(PhastaIOActiveFiles[i]->int_chunk, aa, DB_HEADER_SIZE); 1862 } 1863 else { 1864 // *fileDescriptor = DATA_TYPE_ILLEGAL; 1865 printf("writeheader - DATA_TYPE_ILLEGAL - %s\n",datatype); 1866 endTimer(&timer_end); 1867 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1868 return; 1869 } 1870 free(ts1); 1871 1872 PhastaIOActiveFiles[i]->part_count++; 1873 if ( PhastaIOActiveFiles[i]->part_count == PhastaIOActiveFiles[i]->nppp ) { 1874 //A new field will be written 1875 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1876 memcpy( PhastaIOActiveFiles[i]->master_header + 1877 PhastaIOActiveFiles[i]->field_count * 1878 MAX_FIELDS_NAME_LENGTH + 1879 MAX_FIELDS_NAME_LENGTH * 2, 1880 mpi_tag, 1881 MAX_FIELDS_NAME_LENGTH-1); 1882 } 1883 PhastaIOActiveFiles[i]->field_count++; 1884 PhastaIOActiveFiles[i]->part_count=0; 1885 } 1886 free(buffer); 1887 } 1888 1889 endTimer(&timer_end); 1890 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1891 } 1892 1893 void writedatablock( const int* fileDescriptor, 1894 const char keyphrase[], 1895 const void* valueArray, 1896 const int* nItems, 1897 const char datatype[], 1898 const char iotype[] ) 1899 { 1900 //if(irank == 0) printf("entering writedatablock()\n"); 1901 1902 unsigned long long data_size = 0; 1903 double timer_start, timer_end; 1904 startTimer(&timer_start); 1905 1906 int i = *fileDescriptor; 1907 checkFileDescriptor("writedatablock",&i); 1908 1909 if ( PhastaIONextActiveIndex == 0 ) { 1910 int filePtr = *fileDescriptor - 1; 1911 1912 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1913 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1914 fprintf(stderr,"openfile_ function has to be called before \n") ; 1915 fprintf(stderr,"acessing the file\n ") ; 1916 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1917 endTimer(&timer_end); 1918 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1919 return; 1920 } 1921 // since we require that a consistant header always preceed the data block 1922 // let us check to see that it is actually the case. 1923 1924 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1925 fprintf(stderr, "Header not consistant with data block\n"); 1926 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1927 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1928 fprintf(stderr, "Please recheck write sequence \n"); 1929 if( Strict_Error ) { 1930 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1931 endTimer(&timer_end); 1932 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1933 return; 1934 } 1935 } 1936 1937 FILE* fileObject = fileArray[ filePtr ] ; 1938 size_t type_size=typeSize( datatype ); 1939 isBinary( iotype ); 1940 1941 free(LastHeaderKey[filePtr]); 1942 LastHeaderKey.erase(filePtr); 1943 1944 if ( header_type[filePtr] != (int)type_size ) { 1945 fprintf(stderr,"header and datablock differ on typeof data in the block for\n"); 1946 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1947 if( Strict_Error ) { 1948 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1949 endTimer(&timer_end); 1950 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1951 return; 1952 } 1953 } 1954 1955 int nUnits = *nItems; 1956 1957 if ( nUnits != DataSize ) { 1958 fprintf(stderr,"header and datablock differ on number of data items for\n"); 1959 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1960 if( Strict_Error ) { 1961 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1962 endTimer(&timer_end); 1963 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1964 return; 1965 } 1966 } 1967 1968 if ( binary_format ) { 1969 1970 fwrite( valueArray, type_size, nUnits, fileObject ); 1971 fprintf( fileObject,"\n"); 1972 1973 } else { 1974 1975 char* ts1 = StringStripper( datatype ); 1976 if ( cscompare( "integer", ts1 ) ) { 1977 for( int n=0; n < nUnits ; n++ ) 1978 fprintf(fileObject,"%d\n",*((int*)((int*)valueArray+n))); 1979 } else if ( cscompare( "double", ts1 ) ) { 1980 for( int n=0; n < nUnits ; n++ ) 1981 fprintf(fileObject,"%lf\n",*((double*)((double*)valueArray+n))); 1982 } 1983 free (ts1); 1984 } 1985 //return ; 1986 } 1987 else { // syncIO case 1988 MPI_Status write_data_status; 1989 isBinary( iotype ); 1990 int nUnits = *nItems; 1991 1992 //MR CHANGE 1993 // if ( cscompare(datatype,"double") ) 1994 char* ts1 = StringStripper( datatype ); 1995 if ( cscompare("double",ts1) ) 1996 //MR CHANGE END 1997 { 1998 memcpy((PhastaIOActiveFiles[i]->double_chunk+DB_HEADER_SIZE/sizeof(double)), valueArray, nUnits*sizeof(double)); 1999 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 2000 PhastaIOActiveFiles[i]->my_offset, 2001 PhastaIOActiveFiles[i]->double_chunk, 2002 //BLOCK_SIZE/sizeof(double), 2003 nUnits+DB_HEADER_SIZE/sizeof(double), 2004 MPI_DOUBLE ); 2005 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 2006 PhastaIOActiveFiles[i]->double_chunk, 2007 &write_data_status ); 2008 data_size=8*nUnits; 2009 } 2010 //MR CHANGE 2011 // else if ( cscompare ( datatype, "integer")) 2012 else if ( cscompare("integer",ts1) ) 2013 //MR CHANGE END 2014 { 2015 memcpy((PhastaIOActiveFiles[i]->int_chunk+DB_HEADER_SIZE/sizeof(int)), valueArray, nUnits*sizeof(int)); 2016 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 2017 PhastaIOActiveFiles[i]->my_offset, 2018 PhastaIOActiveFiles[i]->int_chunk, 2019 nUnits+DB_HEADER_SIZE/sizeof(int), 2020 MPI_INT ); 2021 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 2022 PhastaIOActiveFiles[i]->int_chunk, 2023 &write_data_status ); 2024 data_size=4*nUnits; 2025 } 2026 else { 2027 printf("Error: writedatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 2028 endTimer(&timer_end); 2029 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 2030 return; 2031 } 2032 free(ts1); 2033 } 2034 2035 endTimer(&timer_end); 2036 char extra_msg[1024]; 2037 memset(extra_msg, '\0', 1024); 2038 char* key = StringStripper(keyphrase); 2039 sprintf(extra_msg, " field is %s ", key); 2040 printPerf("writedatablock", timer_start, timer_end, data_size, 1, extra_msg); 2041 free(key); 2042 2043 } 2044 2045 void 2046 SwapArrayByteOrder( void* array, 2047 int nbytes, 2048 int nItems ) 2049 { 2050 /* This swaps the byte order for the array of nItems each 2051 of size nbytes , This will be called only locally */ 2052 int i,j; 2053 unsigned char* ucDst = (unsigned char*)array; 2054 2055 for(i=0; i < nItems; i++) { 2056 for(j=0; j < (nbytes/2); j++) 2057 std::swap( ucDst[j] , ucDst[(nbytes - 1) - j] ); 2058 ucDst += nbytes; 2059 } 2060 } 2061 2062 void 2063 writestring( int* fileDescriptor, 2064 const char inString[] ) 2065 { 2066 2067 int filePtr = *fileDescriptor - 1; 2068 FILE* fileObject = fileArray[filePtr] ; 2069 fprintf(fileObject,"%s",inString ); 2070 return; 2071 } 2072 2073 void 2074 Gather_Headers( int* fileDescriptor, 2075 vector< string >& headers ) 2076 { 2077 2078 FILE* fileObject; 2079 char Line[1024]; 2080 2081 fileObject = fileArray[ (*fileDescriptor)-1 ]; 2082 2083 while( !feof(fileObject) ) { 2084 fgets( Line, 1024, fileObject); 2085 if ( Line[0] == '#' ) { 2086 headers.push_back( Line ); 2087 } else { 2088 break; 2089 } 2090 } 2091 rewind( fileObject ); 2092 clearerr( fileObject ); 2093 } 2094 void 2095 isWrong( void ) { (Wrong_Endian) ? fprintf(stdout,"YES\n"): fprintf(stdout,"NO\n") ; } 2096 2097 void 2098 togglestrictmode( void ) { Strict_Error = !Strict_Error; } 2099 2100 int 2101 isLittleEndian( void ) 2102 { 2103 // this function returns a 1 if the current running architecture is 2104 // LittleEndian Byte Ordered, else it returns a zero 2105 2106 union { 2107 long a; 2108 char c[sizeof( long )]; 2109 } endianUnion; 2110 2111 endianUnion.a = 1 ; 2112 2113 if ( endianUnion.c[sizeof(long)-1] != 1 ) return 1 ; 2114 else return 0; 2115 } 2116 2117 namespace PHASTA { 2118 const char* const PhastaIO_traits<int>::type_string = "integer"; 2119 const char* const PhastaIO_traits<double>::type_string = "double"; 2120 } 2121