1 /* 2 * 3 * This is the SyncIO library that uses MPI-IO collective functions to 4 * implement a flexible I/O checkpoint solution for a large number of 5 * processors. 6 * 7 * Previous developer: Ning Liu (liun2@cs.rpi.edu) 8 * Jing Fu (fuj@cs.rpi.edu) 9 * Current developers: Michel Rasquin (Michel.Rasquin@colorado.edu), 10 * Ben Matthews (benjamin.a.matthews@colorado.edu) 11 * 12 */ 13 14 #include <map> 15 #include <vector> 16 #include <string> 17 #include <string.h> 18 #include <ctype.h> 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <math.h> 22 #include "phastaIO.h" 23 #include "mpi.h" 24 #include "phiotmrc.h" 25 26 #define VERSION_INFO_HEADER_SIZE 8192 27 #define DB_HEADER_SIZE 1024 28 #define ONE_MEGABYTE 1048576 29 #define TWO_MEGABYTE 2097152 30 #define ENDIAN_TEST_NUMBER 12180 // Troy's Zip Code!! 31 #define MAX_PHASTA_FILES 64 32 #define MAX_PHASTA_FILE_NAME_LENGTH 1024 33 #define MAX_FIELDS_NUMBER ((VERSION_INFO_HEADER_SIZE/MAX_FIELDS_NAME_LENGTH)-4) // The meta data include - MPI_IO_Tag, nFields, nFields*names of the fields, nppf 34 // -3 for MPI_IO_Tag, nFields and nppf, -4 for extra security (former nFiles) 35 #define MAX_FIELDS_NAME_LENGTH 128 36 #define DefaultMHSize (4*ONE_MEGABYTE) 37 //#define DefaultMHSize (8350) //For test 38 #define LATEST_WRITE_VERSION 1 39 #define inv1024sq 953.674316406e-9 // = 1/1024/1024 40 int MasterHeaderSize = -1; 41 42 bool PRINT_PERF = false; // default print no perf results 43 int irank = -1; // global rank, should never be manually manipulated 44 int mysize = -1; 45 46 // Static variables are bad but used here to store the subcommunicator and associated variables 47 // Prevent MPI_Comm_split to be called more than once, especially on BGQ with the V1R2M1 driver (leak detected in MPI_Comm_split - IBM working on it) 48 static int s_assign_local_comm = 0; 49 static MPI_Comm s_local_comm; 50 static int s_local_size = -1; 51 static int s_local_rank = -1; 52 53 //unsigned long long pool_align = 8; 54 //unsigned long long mem_address; 55 56 // the following defines are for debug printf 57 #define PHASTAIO_PREFIX "phastaIO debug: " 58 #define PHASTAIO_DEBUG 0 //default to not print any debugging info 59 60 #if PHASTAIO_DEBUG 61 #define phprintf( s, arg...) printf(PHASTAIO_PREFIX s "\n", ##arg) 62 #define phprintf_0( s, arg...) do{ \ 63 MPI_Comm_rank(MPI_COMM_WORLD, &irank); \ 64 if(irank == 0){ \ 65 printf(PHASTAIO_PREFIX "irank=0: " s "\n", ##arg); \ 66 } \ 67 } while(0) 68 #else 69 #define phprintf( s, arg...) 70 #define phprintf_0( s, arg...) 71 #endif 72 73 enum PhastaIO_Errors 74 { 75 MAX_PHASTA_FILES_EXCEEDED = -1, 76 UNABLE_TO_OPEN_FILE = -2, 77 NOT_A_MPI_FILE = -3, 78 GPID_EXCEEDED = -4, 79 DATA_TYPE_ILLEGAL = -5, 80 }; 81 82 using namespace std; 83 84 namespace{ 85 86 map< int , char* > LastHeaderKey; 87 vector< FILE* > fileArray; 88 vector< bool > byte_order; 89 vector< int > header_type; 90 int DataSize=0; 91 bool LastHeaderNotFound = false; 92 bool Wrong_Endian = false ; 93 bool Strict_Error = false ; 94 bool binary_format = true; 95 96 /***********************************************************************/ 97 /***************** NEW PHASTA IO CODE STARTS HERE **********************/ 98 /***********************************************************************/ 99 100 typedef struct 101 { 102 char filename[MAX_PHASTA_FILE_NAME_LENGTH]; /* defafults to 1024 */ 103 unsigned long long my_offset; 104 unsigned long long next_start_address; 105 unsigned long long **my_offset_table; 106 unsigned long long **my_read_table; 107 108 double * double_chunk; 109 double * read_double_chunk; 110 111 int field_count; 112 int part_count; 113 int read_field_count; 114 int read_part_count; 115 int GPid; 116 int start_id; 117 118 int mhsize; 119 120 int myrank; 121 int numprocs; 122 int local_myrank; 123 int local_numprocs; 124 125 int nppp; 126 int nPPF; 127 int nFiles; 128 int nFields; 129 130 int * int_chunk; 131 int * read_int_chunk; 132 133 int Wrong_Endian; /* default to false */ 134 char * master_header; 135 MPI_File file_handle; 136 MPI_Comm local_comm; 137 } phastaio_file_t; 138 139 typedef struct 140 { 141 //unsigned long long my_offset; 142 //unsigned long long **offset_table; 143 //int fileID; 144 int nppf, nfields; 145 //int GPid; 146 //int read_field_count; 147 char * masterHeader; 148 }serial_file; 149 150 serial_file *SerialFile; 151 phastaio_file_t *PhastaIOActiveFiles[MAX_PHASTA_FILES]; 152 int PhastaIONextActiveIndex = 0; /* indicates next index to allocate */ 153 154 // the caller has the responsibility to delete the returned string 155 // TODO: StringStipper("nbc value? ") returns NULL? 156 char* 157 StringStripper( const char istring[] ) { 158 159 int length = strlen( istring ); 160 161 //char* dest = new char [ length + 1 ]; 162 char* dest = (char *)malloc( length + 1 ); 163 164 //char * dest; 165 //dest = (char *)malloc( length + 1 + pool_align ); 166 //if (dest & 0x0F != 0) printf("not aligned!!!!\n"); 167 //mem_address = (long long )dest; 168 //if( mem_address & (pool_align -1) ) 169 // dest += pool_align - (mem_address & (pool_align -1)); 170 171 strcpy( dest, istring ); 172 dest[ length ] = '\0'; 173 174 if ( char* p = strpbrk( dest, " ") ) 175 *p = '\0'; 176 177 return dest; 178 } 179 180 inline int 181 cscompare( const char teststring[], 182 const char targetstring[] ) { 183 184 char* s1 = const_cast<char*>(teststring); 185 char* s2 = const_cast<char*>(targetstring); 186 187 while( *s1 == ' ') s1++; 188 while( *s2 == ' ') s2++; 189 while( ( *s1 ) 190 && ( *s2 ) 191 && ( *s2 != '?') 192 && ( tolower( *s1 )==tolower( *s2 ) ) ) { 193 s1++; 194 s2++; 195 while( *s1 == ' ') s1++; 196 while( *s2 == ' ') s2++; 197 } 198 if ( !( *s1 ) || ( *s1 == '?') ) return 1; 199 else return 0; 200 } 201 202 inline void 203 isBinary( const char iotype[] ) { 204 205 char* fname = StringStripper( iotype ); 206 if ( cscompare( fname, "binary" ) ) binary_format = true; 207 else binary_format = false; 208 free (fname); 209 210 } 211 212 inline size_t 213 typeSize( const char typestring[] ) { 214 215 char* ts1 = StringStripper( typestring ); 216 217 if ( cscompare( "integer", ts1 ) ) { 218 free (ts1); 219 return sizeof(int); 220 } else if ( cscompare( "double", ts1 ) ) { 221 free (ts1); 222 return sizeof( double ); 223 } else { 224 free (ts1); 225 fprintf(stderr,"unknown type : %s\n",ts1); 226 return 0; 227 } 228 } 229 230 int 231 readHeader( FILE* fileObject, 232 const char phrase[], 233 int* params, 234 int expect ) { 235 236 char* text_header; 237 char* token; 238 char Line[1024]; 239 char junk; 240 bool FOUND = false ; 241 int real_length; 242 int skip_size, integer_value; 243 int rewind_count=0; 244 245 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 246 rewind( fileObject ); 247 clearerr( fileObject ); 248 rewind_count++; 249 fgets( Line, 1024, fileObject ); 250 } 251 252 while( !FOUND && ( rewind_count < 2 ) ) { 253 if ( ( Line[0] != '\n' ) && ( real_length = strcspn( Line, "#" )) ){ 254 //text_header = new char [ real_length + 1 ]; 255 text_header = (char *)malloc( real_length + 1 ); 256 //text_header = (char *)malloc( real_length + 1 + pool_align ); 257 //mem_address = (long long )text_header; 258 //if( mem_address & (pool_align -1) ) 259 // text_header += pool_align - (mem_address & (pool_align -1)); 260 261 strncpy( text_header, Line, real_length ); 262 text_header[ real_length ] =static_cast<char>(NULL); 263 token = strtok ( text_header, ":" ); 264 //if( cscompare( phrase , token ) ) { 265 // Double comparison required because different fields can still start 266 // with the same name for mixed meshes (nbc code, nbc values, etc). 267 // Would be easy to fix cscompare instead but would it break sth else? 268 if( cscompare( phrase , token ) && cscompare( token , phrase ) ) { 269 FOUND = true ; 270 token = strtok( NULL, " ,;<>" ); 271 skip_size = atoi( token ); 272 int i; 273 for( i=0; i < expect && ( token = strtok( NULL," ,;<>") ); i++) { 274 params[i] = atoi( token ); 275 } 276 if ( i < expect ) { 277 fprintf(stderr,"Aloha Expected # of ints not found for: %s\n",phrase ); 278 } 279 } else if ( cscompare(token,"byteorder magic number") ) { 280 if ( binary_format ) { 281 fread((void*)&integer_value,sizeof(int),1,fileObject); 282 fread( &junk, sizeof(char), 1 , fileObject ); 283 if ( 362436 != integer_value ) Wrong_Endian = true; 284 } else{ 285 fscanf(fileObject, "%d\n", &integer_value ); 286 } 287 } else { 288 /* some other header, so just skip over */ 289 token = strtok( NULL, " ,;<>" ); 290 skip_size = atoi( token ); 291 if ( binary_format) 292 fseek( fileObject, skip_size, SEEK_CUR ); 293 else 294 for( int gama=0; gama < skip_size; gama++ ) 295 fgets( Line, 1024, fileObject ); 296 } 297 free (text_header); 298 } // end of if before while loop 299 300 if ( !FOUND ) 301 if( !fgets( Line, 1024, fileObject ) && feof( fileObject ) ) { 302 rewind( fileObject ); 303 clearerr( fileObject ); 304 rewind_count++; 305 fgets( Line, 1024, fileObject ); 306 } 307 } 308 309 if ( !FOUND ) { 310 //fprintf(stderr, "Error: Could not find: %s\n", phrase); 311 if(irank == 0) printf("WARNING: Could not find: %s\n", phrase); 312 return 1; 313 } 314 return 0; 315 } 316 317 } // end unnamed namespace 318 319 320 // begin of publicly visible functions 321 322 /** 323 * This function takes a long long pointer and assign (start) phiotmrc value to it 324 */ 325 //void startTimer(unsigned long long* start) { 326 void startTimer(double* start) { 327 328 if( !PRINT_PERF ) return; 329 330 MPI_Barrier(MPI_COMM_WORLD); 331 *start = phiotmrc(); 332 } 333 334 /** 335 * This function takes a long long pointer and assign (end) phiotmrc value to it 336 */ 337 void endTimer(double* end) { 338 339 if( !PRINT_PERF ) return; 340 341 *end = phiotmrc(); 342 MPI_Barrier(MPI_COMM_WORLD); 343 } 344 345 /** 346 * choose to print some performance results (or not) according to 347 * the PRINT_PERF macro 348 */ 349 void printPerf( 350 const char* func_name, 351 double start, 352 double end, 353 unsigned long long datasize, 354 int printdatainfo, 355 const char* extra_msg) { 356 357 if( !PRINT_PERF ) return; 358 359 unsigned long long data_size = datasize; 360 361 double time = end - start; 362 363 unsigned long long isizemin,isizemax,isizetot; 364 double sizemin,sizemax,sizeavg,sizetot,rate; 365 double tmin, tmax, tavg, ttot; 366 367 MPI_Allreduce(&time, &tmin,1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 368 MPI_Allreduce(&time, &tmax,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 369 MPI_Allreduce(&time, &ttot,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 370 tavg = ttot/mysize; 371 372 if(irank == 0) { 373 if ( PhastaIONextActiveIndex == 0 ) printf("** 1PFPP "); 374 else printf("** syncIO "); 375 printf("%s(): Tmax = %f sec, Tmin = %f sec, Tavg = %f sec", func_name, tmax, tmin, tavg); 376 } 377 378 if(printdatainfo == 1) { // if printdatainfo ==1, compute I/O rate and block size 379 MPI_Allreduce(&data_size,&isizemin,1,MPI_LONG_LONG_INT,MPI_MIN,MPI_COMM_WORLD); 380 MPI_Allreduce(&data_size,&isizemax,1,MPI_LONG_LONG_INT,MPI_MAX,MPI_COMM_WORLD); 381 MPI_Allreduce(&data_size,&isizetot,1,MPI_LONG_LONG_INT,MPI_SUM,MPI_COMM_WORLD); 382 383 sizemin=(double)(isizemin*inv1024sq); 384 sizemax=(double)(isizemax*inv1024sq); 385 sizetot=(double)(isizetot*inv1024sq); 386 sizeavg=(double)(1.0*sizetot/mysize); 387 rate=(double)(1.0*sizetot/tmax); 388 389 if( irank == 0) { 390 printf(", Rate = %f MB/s [%s] \n \t\t\t block size: Min= %f MB; Max= %f MB; Avg= %f MB; Tot= %f MB\n", rate, extra_msg, sizemin,sizemax,sizeavg,sizetot); 391 } 392 } 393 else { 394 if(irank == 0) { 395 printf(" \n"); 396 //printf(" (%s) \n", extra_msg); 397 } 398 } 399 } 400 401 /** 402 * This function is normally called at the beginning of a read operation, before 403 * init function. 404 * This function (uses rank 0) reads out nfields, nppf, master header size, 405 * endianess and allocates for masterHeader string. 406 * These values are essential for following read operations. Rank 0 will bcast 407 * these values to other ranks in the commm world 408 * 409 * If the file set is of old POSIX format, it would throw error and exit 410 */ 411 void queryphmpiio(const char filename[],int *nfields, int *nppf) 412 { 413 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 414 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 415 416 if(irank == 0) { 417 FILE * fileHandle; 418 char* fname = StringStripper( filename ); 419 420 fileHandle = fopen (fname,"rb"); 421 if (fileHandle == NULL ) { 422 printf("\nError: File %s doesn't exist! Please check!\n",fname); 423 } 424 else { 425 SerialFile =(serial_file *)calloc( 1, sizeof( serial_file) ); 426 //SerialFile = (serial_file *)malloc( sizeof( serial_file ) + pool_align ); 427 //mem_address = (long long )SerialFile; 428 //if( mem_address & (pool_align -1) ) 429 // SerialFile += pool_align - (mem_address & (pool_align -1)); 430 431 //SerialFile->masterHeader = (char *)malloc(MasterHeaderSize); 432 //int meta_size_limit = 4*ONE_MEGABYTE; 433 //int meta_size_limit = (4+ MAX_FIELDS_NUMBER ) * MAX_FIELDS_NAME_LENGTH; 434 int meta_size_limit = VERSION_INFO_HEADER_SIZE; 435 SerialFile->masterHeader = (char *)malloc( meta_size_limit ); 436 //SerialFile->masterHeader = (char *)malloc( meta_size_limit + pool_align ); 437 //mem_address = (long long )SerialFile; 438 //if( mem_address & (pool_align -1) ) 439 // SerialFile->masterHeader += pool_align - (mem_address & (pool_align -1)); 440 441 fread(SerialFile->masterHeader, 1, meta_size_limit, fileHandle); 442 443 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 444 char version[MAX_FIELDS_NAME_LENGTH/4]; 445 int mhsize; 446 char * token; 447 int magic_number; 448 449 memcpy( read_out_tag, 450 SerialFile->masterHeader, 451 MAX_FIELDS_NAME_LENGTH-1 ); 452 453 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) { 454 // Test endianess ... 455 memcpy (&magic_number, 456 SerialFile->masterHeader + sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 457 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 458 459 if ( magic_number != ENDIAN_TEST_NUMBER ) { 460 printf("Endian is different!\n"); 461 // Will do swap later 462 } 463 464 // test version, old version, default masterheader size is 4M 465 // newer version masterheader size is read from first line 466 memcpy(version, 467 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/2, 468 MAX_FIELDS_NAME_LENGTH/4 - 1); //TODO: why -1? 469 470 if( cscompare ("version",version) ) { 471 // if there is "version" tag in the file, then it is newer format 472 // read master header size from here, otherwise use default 473 // Note: if version is "1", we know mhsize is at 3/4 place... 474 475 token = strtok(version, ":"); 476 token = strtok(NULL, " ,;<>" ); 477 int iversion = atoi(token); 478 479 if( iversion == 1) { 480 memcpy( &mhsize, 481 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH/4*3 + sizeof("mhsize : ")-1, 482 sizeof(int)); 483 if ( magic_number != ENDIAN_TEST_NUMBER ) 484 SwapArrayByteOrder(&mhsize, sizeof(int), 1); 485 486 if( mhsize > DefaultMHSize ) { 487 //if actual headersize is larger than default, let's re-read 488 free(SerialFile->masterHeader); 489 SerialFile->masterHeader = (char *)malloc(mhsize); 490 fseek(fileHandle, 0, SEEK_SET); // reset the file stream position 491 fread(SerialFile->masterHeader,1,mhsize,fileHandle); 492 } 493 } 494 //TODO: check if this is a valid int?? 495 MasterHeaderSize = mhsize; 496 } 497 else { // else it's version 0's format w/o version tag, implicating MHSize=4M 498 MasterHeaderSize = DefaultMHSize; 499 } 500 501 memcpy( read_out_tag, 502 SerialFile->masterHeader+MAX_FIELDS_NAME_LENGTH+1, 503 MAX_FIELDS_NAME_LENGTH ); //TODO: why +1 504 505 // Read in # fields ... 506 token = strtok( read_out_tag, ":" ); 507 token = strtok( NULL," ,;<>" ); 508 *nfields = atoi( token ); 509 if ( *nfields > MAX_FIELDS_NUMBER) { 510 printf("Error queryphmpiio: nfields is larger than MAX_FIELDS_NUMBER!\n"); 511 } 512 SerialFile->nfields=*nfields; //TODO: sanity check of this int? 513 514 memcpy( read_out_tag, 515 SerialFile->masterHeader + MAX_FIELDS_NAME_LENGTH * 2 516 + *nfields * MAX_FIELDS_NAME_LENGTH, 517 MAX_FIELDS_NAME_LENGTH); 518 519 token = strtok( read_out_tag, ":" ); 520 token = strtok( NULL," ,;<>" ); 521 *nppf = atoi( token ); 522 SerialFile->nppf=*nppf; //TODO: sanity check of int 523 } // end of if("MPI_IO_TAG") 524 else { 525 printf("Error queryphmpiio: The file you opened is not of syncIO new format, please check! read_out_tag = %s\n",read_out_tag); 526 exit(1); 527 } 528 fclose(fileHandle); 529 free(SerialFile->masterHeader); 530 free(SerialFile); 531 } //end of else 532 free(fname); 533 } 534 535 // Bcast value to every one 536 MPI_Bcast( nfields, 1, MPI_INT, 0, MPI_COMM_WORLD ); 537 MPI_Bcast( nppf, 1, MPI_INT, 0, MPI_COMM_WORLD ); 538 MPI_Bcast( &MasterHeaderSize, 1, MPI_INT, 0, MPI_COMM_WORLD ); 539 phprintf("Info queryphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 540 } 541 542 /** 543 * This function computes the right master header size (round to size of 2^n). 544 * This is only needed for file format version 1 in "write" mode. 545 */ 546 int computeMHSize(int nfields, int nppf, int version) { 547 int mhsize; 548 if(version == 1) { 549 //int meta_info_size = (2+nfields+1) * MAX_FIELDS_NAME_LENGTH; // 2 is MPI_IO_TAG and nFields, the others 1 is nppf 550 int meta_info_size = VERSION_INFO_HEADER_SIZE; 551 int actual_size = nfields * nppf * sizeof(long long) + meta_info_size; 552 //printf("actual_size = %d, offset table size = %d\n", actual_size, nfields * nppf * sizeof(long long)); 553 if (actual_size > DefaultMHSize) { 554 mhsize = (int) ceil( (double) actual_size/DefaultMHSize); // it's rounded to ceiling of this value 555 mhsize *= DefaultMHSize; 556 } 557 else { 558 mhsize = DefaultMHSize; 559 } 560 } 561 return mhsize; 562 } 563 564 /** 565 * Computes correct color of a rank according to number of files. 566 */ 567 extern "C" int computeColor( int myrank, int numprocs, int nfiles) { 568 int color = 569 (int)(myrank / (numprocs / nfiles)); 570 return color; 571 } 572 573 574 /** 575 * Check the file descriptor. 576 */ 577 void checkFileDescriptor(const char fctname[], 578 int* fileDescriptor ) { 579 if ( *fileDescriptor < 0 ) { 580 printf("Error: File descriptor = %d in %s\n",*fileDescriptor,fctname); 581 exit(1); 582 } 583 } 584 585 /** 586 * Initialize the file struct members and allocate space for file struct 587 * buffers. 588 * 589 * Note: this function is only called when we are using new format. Old POSIX 590 * format should skip this routine and call openfile() directly instead. 591 */ 592 int initphmpiio( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[]) 593 { 594 // we init irank again in case query not called (e.g. syncIO write case) 595 MPI_Comm_rank(MPI_COMM_WORLD, &irank); 596 MPI_Comm_size(MPI_COMM_WORLD, &mysize); 597 598 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 599 600 double timer_start, timer_end; 601 startTimer(&timer_start); 602 603 char* imode = StringStripper( mode ); 604 605 // Note: if it's read, we presume query was called prior to init and 606 // MasterHeaderSize is already set to correct value from parsing header 607 // otherwise it's write then it needs some computation to be set 608 if ( cscompare( "read", imode ) ) { 609 // do nothing 610 } 611 else if( cscompare( "write", imode ) ) { 612 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 613 } 614 else { 615 printf("Error initphmpiio: can't recognize the mode %s", imode); 616 exit(1); 617 } 618 free ( imode ); 619 620 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 621 622 int i, j; 623 624 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 625 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 626 endTimer(&timer_end); 627 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 628 return MAX_PHASTA_FILES_EXCEEDED; 629 } 630 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 631 // { 632 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 633 // { 634 // PhastaIOActiveFiles[i] = NULL; 635 // } 636 // } 637 638 639 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 640 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 641 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 642 //if( mem_address & (pool_align -1) ) 643 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 644 645 i = PhastaIONextActiveIndex; 646 PhastaIONextActiveIndex++; 647 648 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 649 650 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 651 652 PhastaIOActiveFiles[i]->Wrong_Endian = false; 653 654 PhastaIOActiveFiles[i]->nFields = *nfields; 655 PhastaIOActiveFiles[i]->nPPF = *nppf; 656 PhastaIOActiveFiles[i]->nFiles = *nfiles; 657 MPI_Comm_rank(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->myrank)); 658 MPI_Comm_size(MPI_COMM_WORLD, &(PhastaIOActiveFiles[i]->numprocs)); 659 660 661 if( *nfiles > 1 ) { // split the ranks according to each mpiio file 662 663 if ( s_assign_local_comm == 0) { // call mpi_comm_split for the first (and only) time 664 665 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Building subcommunicator\n"); 666 667 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 668 MPI_Comm_split(MPI_COMM_WORLD, 669 color, 670 PhastaIOActiveFiles[i]->myrank, 671 &(PhastaIOActiveFiles[i]->local_comm)); 672 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 673 &(PhastaIOActiveFiles[i]->local_numprocs)); 674 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 675 &(PhastaIOActiveFiles[i]->local_myrank)); 676 677 // back up now these variables so that we do not need to call comm_split again 678 s_local_comm = PhastaIOActiveFiles[i]->local_comm; 679 s_local_size = PhastaIOActiveFiles[i]->local_numprocs; 680 s_local_rank = PhastaIOActiveFiles[i]->local_myrank; 681 s_assign_local_comm = 1; 682 } 683 else { // recycle the subcommunicator 684 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Recycling subcommunicator\n"); 685 PhastaIOActiveFiles[i]->local_comm = s_local_comm; 686 PhastaIOActiveFiles[i]->local_numprocs = s_local_size; 687 PhastaIOActiveFiles[i]->local_myrank = s_local_rank; 688 } 689 } 690 else { // *nfiles == 1 here - no need to call mpi_comm_split here 691 692 if (PhastaIOActiveFiles[i]->myrank == 0) printf("Bypassing subcommunicator\n"); 693 PhastaIOActiveFiles[i]->local_comm = MPI_COMM_WORLD; 694 PhastaIOActiveFiles[i]->local_numprocs = PhastaIOActiveFiles[i]->numprocs; 695 PhastaIOActiveFiles[i]->local_myrank = PhastaIOActiveFiles[i]->myrank; 696 697 } 698 699 PhastaIOActiveFiles[i]->nppp = 700 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 701 702 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 703 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 704 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 705 706 PhastaIOActiveFiles[i]->my_offset_table = 707 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 708 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 709 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 710 //if( mem_address & (pool_align -1) ) 711 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 712 713 PhastaIOActiveFiles[i]->my_read_table = 714 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 715 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 716 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 717 //if( mem_address & (pool_align -1) ) 718 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 719 720 for (j=0; j<*nfields; j++) 721 { 722 PhastaIOActiveFiles[i]->my_offset_table[j] = 723 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 724 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 725 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 726 //if( mem_address & (pool_align -1) ) 727 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 728 729 PhastaIOActiveFiles[i]->my_read_table[j] = 730 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 731 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 732 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 733 //if( mem_address & (pool_align -1) ) 734 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 735 } 736 *filehandle = i; 737 738 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 739 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 740 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 741 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 742 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 743 744 /* 745 PhastaIOActiveFiles[i]->master_header = 746 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 747 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 748 if( mem_address & (pool_align -1) ) 749 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 750 751 PhastaIOActiveFiles[i]->double_chunk = 752 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 753 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 754 if( mem_address & (pool_align -1) ) 755 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 756 757 PhastaIOActiveFiles[i]->int_chunk = 758 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 759 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 760 if( mem_address & (pool_align -1) ) 761 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 762 763 PhastaIOActiveFiles[i]->read_double_chunk = 764 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 765 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 766 if( mem_address & (pool_align -1) ) 767 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 768 769 PhastaIOActiveFiles[i]->read_int_chunk = 770 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 771 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 772 if( mem_address & (pool_align -1) ) 773 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 774 */ 775 776 // Time monitoring 777 endTimer(&timer_end); 778 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 779 780 phprintf_0("Info initphmpiio: quiting function"); 781 782 return i; 783 } 784 785 /** 786 * Destruct the file struct and free buffers allocated in init function. 787 */ 788 void finalizephmpiio( int *fileDescriptor ) 789 { 790 double timer_start, timer_end; 791 startTimer(&timer_start); 792 793 int i, j; 794 i = *fileDescriptor; 795 //PhastaIONextActiveIndex--; 796 797 /* //free the offset table for this phasta file */ 798 //for(j=0; j<MAX_FIELDS_NUMBER; j++) //Danger: undefined behavior for my_*_table.[j] not allocated or not initialized to NULL 799 for(j=0; j<PhastaIOActiveFiles[i]->nFields; j++) 800 { 801 free( PhastaIOActiveFiles[i]->my_offset_table[j]); 802 free( PhastaIOActiveFiles[i]->my_read_table[j]); 803 } 804 free ( PhastaIOActiveFiles[i]->my_offset_table ); 805 free ( PhastaIOActiveFiles[i]->my_read_table ); 806 free ( PhastaIOActiveFiles[i]->master_header ); 807 free ( PhastaIOActiveFiles[i]->double_chunk ); 808 free ( PhastaIOActiveFiles[i]->int_chunk ); 809 free ( PhastaIOActiveFiles[i]->read_double_chunk ); 810 free ( PhastaIOActiveFiles[i]->read_int_chunk ); 811 812 free( PhastaIOActiveFiles[i]); 813 814 endTimer(&timer_end); 815 printPerf("finalizempiio", timer_start, timer_end, 0, 0, ""); 816 817 PhastaIONextActiveIndex--; 818 } 819 820 821 /** 822 * Special init for M2N in order to create a subcommunicator for the reduced solution (requires PRINT_PERF to be false for now) 823 * Initialize the file struct members and allocate space for file struct buffers. 824 * 825 * Note: this function is only called when we are using new format. Old POSIX 826 * format should skip this routine and call openfile() directly instead. 827 */ 828 int initphmpiiosub( int *nfields, int *nppf, int *nfiles, int *filehandle, const char mode[],MPI_Comm my_local_comm) 829 { 830 // we init irank again in case query not called (e.g. syncIO write case) 831 832 MPI_Comm_rank(my_local_comm, &irank); 833 MPI_Comm_size(my_local_comm, &mysize); 834 835 phprintf("Info initphmpiio: entering function, myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 836 837 double timer_start, timer_end; 838 startTimer(&timer_start); 839 840 char* imode = StringStripper( mode ); 841 842 // Note: if it's read, we presume query was called prior to init and 843 // MasterHeaderSize is already set to correct value from parsing header 844 // otherwise it's write then it needs some computation to be set 845 if ( cscompare( "read", imode ) ) { 846 // do nothing 847 } 848 else if( cscompare( "write", imode ) ) { 849 MasterHeaderSize = computeMHSize(*nfields, *nppf, LATEST_WRITE_VERSION); 850 } 851 else { 852 printf("Error initphmpiio: can't recognize the mode %s", imode); 853 exit(1); 854 } 855 free ( imode ); 856 857 phprintf("Info initphmpiio: myrank = %d, MasterHeaderSize = %d", irank, MasterHeaderSize); 858 859 int i, j; 860 861 if( PhastaIONextActiveIndex == MAX_PHASTA_FILES ) { 862 printf("Error initphmpiio: PhastaIONextActiveIndex = MAX_PHASTA_FILES"); 863 endTimer(&timer_end); 864 printPerf("initphmpiio", timer_start, timer_end, 0, 0, ""); 865 return MAX_PHASTA_FILES_EXCEEDED; 866 } 867 // else if( PhastaIONextActiveIndex == 0 ) //Hang in debug mode on Intrepid 868 // { 869 // for( i = 0; i < MAX_PHASTA_FILES; i++ ); 870 // { 871 // PhastaIOActiveFiles[i] = NULL; 872 // } 873 // } 874 875 876 PhastaIOActiveFiles[PhastaIONextActiveIndex] = (phastaio_file_t *)calloc( 1, sizeof( phastaio_file_t) ); 877 //PhastaIOActiveFiles[PhastaIONextActiveIndex] = ( phastaio_file_t *)calloc( 1 + 1, sizeof( phastaio_file_t ) ); 878 //mem_address = (long long )PhastaIOActiveFiles[PhastaIONextActiveIndex]; 879 //if( mem_address & (pool_align -1) ) 880 // PhastaIOActiveFiles[PhastaIONextActiveIndex] += pool_align - (mem_address & (pool_align -1)); 881 882 i = PhastaIONextActiveIndex; 883 PhastaIONextActiveIndex++; 884 885 //PhastaIOActiveFiles[i]->next_start_address = 2*TWO_MEGABYTE; 886 887 PhastaIOActiveFiles[i]->next_start_address = MasterHeaderSize; // what does this mean??? TODO 888 889 PhastaIOActiveFiles[i]->Wrong_Endian = false; 890 891 PhastaIOActiveFiles[i]->nFields = *nfields; 892 PhastaIOActiveFiles[i]->nPPF = *nppf; 893 PhastaIOActiveFiles[i]->nFiles = *nfiles; 894 MPI_Comm_rank(my_local_comm, &(PhastaIOActiveFiles[i]->myrank)); 895 MPI_Comm_size(my_local_comm, &(PhastaIOActiveFiles[i]->numprocs)); 896 897 int color = computeColor(PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->numprocs, PhastaIOActiveFiles[i]->nFiles); 898 MPI_Comm_split(my_local_comm, 899 color, 900 PhastaIOActiveFiles[i]->myrank, 901 &(PhastaIOActiveFiles[i]->local_comm)); 902 MPI_Comm_size(PhastaIOActiveFiles[i]->local_comm, 903 &(PhastaIOActiveFiles[i]->local_numprocs)); 904 MPI_Comm_rank(PhastaIOActiveFiles[i]->local_comm, 905 &(PhastaIOActiveFiles[i]->local_myrank)); 906 PhastaIOActiveFiles[i]->nppp = 907 PhastaIOActiveFiles[i]->nPPF/PhastaIOActiveFiles[i]->local_numprocs; 908 909 PhastaIOActiveFiles[i]->start_id = PhastaIOActiveFiles[i]->nPPF * 910 (int)(PhastaIOActiveFiles[i]->myrank/PhastaIOActiveFiles[i]->local_numprocs) + 911 (PhastaIOActiveFiles[i]->local_myrank * PhastaIOActiveFiles[i]->nppp); 912 913 PhastaIOActiveFiles[i]->my_offset_table = 914 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 915 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof(unsigned long long *) ); 916 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table; 917 //if( mem_address & (pool_align -1) ) 918 // PhastaIOActiveFiles[i]->my_offset_table += pool_align - (mem_address & (pool_align -1)); 919 920 PhastaIOActiveFiles[i]->my_read_table = 921 ( unsigned long long ** ) calloc( MAX_FIELDS_NUMBER , sizeof( unsigned long long *) ); 922 //( unsigned long long **)calloc( MAX_FIELDS_NUMBER + pool_align, sizeof( unsigned long long *) ); 923 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table; 924 //if( mem_address & (pool_align -1) ) 925 // PhastaIOActiveFiles[i]->my_read_table += pool_align - (mem_address & (pool_align -1)); 926 927 for (j=0; j<*nfields; j++) 928 { 929 PhastaIOActiveFiles[i]->my_offset_table[j] = 930 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 931 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp + pool_align, sizeof( unsigned long long) ); 932 //mem_address = (long long )PhastaIOActiveFiles[i]->my_offset_table[j]; 933 //if( mem_address & (pool_align -1) ) 934 // PhastaIOActiveFiles[i]->my_offset_table[j] += pool_align - (mem_address & (pool_align -1)); 935 936 PhastaIOActiveFiles[i]->my_read_table[j] = 937 ( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) ); 938 //( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nppp , sizeof( unsigned long long) + pool_align ); 939 //mem_address = (long long )PhastaIOActiveFiles[i]->my_read_table[j]; 940 //if( mem_address & (pool_align -1) ) 941 // PhastaIOActiveFiles[i]->my_read_table[j] += pool_align - (mem_address & (pool_align -1)); 942 } 943 *filehandle = i; 944 945 PhastaIOActiveFiles[i]->master_header = (char *)calloc(MasterHeaderSize,sizeof( char )); 946 PhastaIOActiveFiles[i]->double_chunk = (double *)calloc(1,sizeof( double )); 947 PhastaIOActiveFiles[i]->int_chunk = (int *)calloc(1,sizeof( int )); 948 PhastaIOActiveFiles[i]->read_double_chunk = (double *)calloc(1,sizeof( double )); 949 PhastaIOActiveFiles[i]->read_int_chunk = (int *)calloc(1,sizeof( int )); 950 951 /* 952 PhastaIOActiveFiles[i]->master_header = 953 ( char * ) calloc( MasterHeaderSize + pool_align, sizeof( char ) ); 954 mem_address = (long long )PhastaIOActiveFiles[i]->master_header; 955 if( mem_address & (pool_align -1) ) 956 PhastaIOActiveFiles[i]->master_header += pool_align - (mem_address & (pool_align -1)); 957 958 PhastaIOActiveFiles[i]->double_chunk = 959 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 960 mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 961 if( mem_address & (pool_align -1) ) 962 PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 963 964 PhastaIOActiveFiles[i]->int_chunk = 965 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 966 mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 967 if( mem_address & (pool_align -1) ) 968 PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & (pool_align -1)); 969 970 PhastaIOActiveFiles[i]->read_double_chunk = 971 ( double * ) calloc( 1 + pool_align , sizeof( double ) ); 972 mem_address = (long long )PhastaIOActiveFiles[i]->read_double_chunk; 973 if( mem_address & (pool_align -1) ) 974 PhastaIOActiveFiles[i]->read_double_chunk += pool_align - (mem_address & (pool_align -1)); 975 976 PhastaIOActiveFiles[i]->read_int_chunk = 977 ( int * ) calloc( 1 + pool_align , sizeof( int ) ); 978 mem_address = (long long )PhastaIOActiveFiles[i]->read_int_chunk; 979 if( mem_address & (pool_align -1) ) 980 PhastaIOActiveFiles[i]->read_int_chunk += pool_align - (mem_address & (pool_align -1)); 981 */ 982 983 // Time monitoring 984 endTimer(&timer_end); 985 printPerf("initphmpiiosub", timer_start, timer_end, 0, 0, ""); 986 987 phprintf_0("Info initphmpiiosub: quiting function"); 988 989 return i; 990 } 991 992 993 994 /** open file for both POSIX and MPI-IO syncIO format. 995 * 996 * If it's old POSIX format, simply call posix fopen(). 997 * 998 * If it's MPI-IO foramt: 999 * in "read" mode, it builds the header table that points to the offset of 1000 * fields for parts; 1001 * in "write" mode, it opens the file with MPI-IO open routine. 1002 */ 1003 void openfile(const char filename[], 1004 const char mode[], 1005 int* fileDescriptor ) 1006 { 1007 phprintf_0("Info: entering openfile"); 1008 1009 double timer_start, timer_end; 1010 startTimer(&timer_start); 1011 1012 if ( PhastaIONextActiveIndex == 0 ) 1013 { 1014 FILE* file=NULL ; 1015 *fileDescriptor = 0; 1016 char* fname = StringStripper( filename ); 1017 char* imode = StringStripper( mode ); 1018 1019 if ( cscompare( "read", imode ) ) file = fopen(fname, "rb" ); 1020 else if( cscompare( "write", imode ) ) file = fopen(fname, "wb" ); 1021 else if( cscompare( "append", imode ) ) file = fopen(fname, "ab" ); 1022 1023 if ( !file ){ 1024 fprintf(stderr,"Error openfile: unable to open file %s",fname ) ; 1025 } else { 1026 fileArray.push_back( file ); 1027 byte_order.push_back( false ); 1028 header_type.push_back( sizeof(int) ); 1029 *fileDescriptor = fileArray.size(); 1030 } 1031 free (fname); 1032 free (imode); 1033 } 1034 else // else it would be parallel I/O, opposed to posix io 1035 { 1036 char* fname = StringStripper( filename ); 1037 char* imode = StringStripper( mode ); 1038 int rc; 1039 int i = *fileDescriptor; 1040 checkFileDescriptor("openfile",&i); 1041 char* token; 1042 1043 if ( cscompare( "read", imode ) ) 1044 { 1045 // if (PhastaIOActiveFiles[i]->myrank == 0) 1046 // printf("\n **********\nRead open ... ... regular version\n"); 1047 1048 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1049 fname, 1050 MPI_MODE_RDONLY, 1051 MPI_INFO_NULL, 1052 &(PhastaIOActiveFiles[i]->file_handle) ); 1053 1054 if(rc) 1055 { 1056 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1057 printf("Error openfile: Unable to open file %s! File descriptor = %d\n",fname,*fileDescriptor); 1058 endTimer(&timer_end); 1059 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1060 return; 1061 } 1062 1063 MPI_Status read_tag_status; 1064 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1065 int j; 1066 int magic_number; 1067 1068 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1069 MPI_File_read_at( PhastaIOActiveFiles[i]->file_handle, 1070 0, 1071 PhastaIOActiveFiles[i]->master_header, 1072 MasterHeaderSize, 1073 MPI_CHAR, 1074 &read_tag_status ); 1075 } 1076 1077 MPI_Bcast( PhastaIOActiveFiles[i]->master_header, 1078 MasterHeaderSize, 1079 MPI_CHAR, 1080 0, 1081 PhastaIOActiveFiles[i]->local_comm ); 1082 1083 memcpy( read_out_tag, 1084 PhastaIOActiveFiles[i]->master_header, 1085 MAX_FIELDS_NAME_LENGTH-1 ); 1086 1087 if ( cscompare ("MPI_IO_Tag",read_out_tag) ) 1088 { 1089 // Test endianess ... 1090 memcpy ( &magic_number, 1091 PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1092 sizeof(int) ); // masterheader should look like "MPI_IO_Tag : 12180 " with 12180 in binary format 1093 1094 if ( magic_number != ENDIAN_TEST_NUMBER ) 1095 { 1096 PhastaIOActiveFiles[i]->Wrong_Endian = true; 1097 } 1098 1099 memcpy( read_out_tag, 1100 PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH+1, // TODO: WHY +1??? 1101 MAX_FIELDS_NAME_LENGTH ); 1102 1103 // Read in # fields ... 1104 token = strtok ( read_out_tag, ":" ); 1105 token = strtok( NULL," ,;<>" ); 1106 PhastaIOActiveFiles[i]->nFields = atoi( token ); 1107 1108 unsigned long long **header_table; 1109 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1110 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1111 //mem_address = (long long )header_table; 1112 //if( mem_address & (pool_align -1) ) 1113 // header_table += pool_align - (mem_address & (pool_align -1)); 1114 1115 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1116 { 1117 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1118 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof(unsigned long long *)); 1119 //mem_address = (long long )header_table[j]; 1120 //if( mem_address & (pool_align -1) ) 1121 // header_table[j] += pool_align - (mem_address & (pool_align -1)); 1122 } 1123 1124 // Read in the offset table ... 1125 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) 1126 { 1127 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1128 memcpy( header_table[j], 1129 PhastaIOActiveFiles[i]->master_header + 1130 VERSION_INFO_HEADER_SIZE + 1131 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1132 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1133 } 1134 1135 MPI_Scatter( header_table[j], 1136 PhastaIOActiveFiles[i]->nppp, 1137 MPI_LONG_LONG_INT, 1138 PhastaIOActiveFiles[i]->my_read_table[j], 1139 PhastaIOActiveFiles[i]->nppp, 1140 MPI_LONG_LONG_INT, 1141 0, 1142 PhastaIOActiveFiles[i]->local_comm ); 1143 1144 // Swap byte order if endianess is different ... 1145 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) { 1146 SwapArrayByteOrder( PhastaIOActiveFiles[i]->my_read_table[j], 1147 sizeof(long long int), 1148 PhastaIOActiveFiles[i]->nppp ); 1149 } 1150 } 1151 1152 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1153 free ( header_table[j] ); 1154 } 1155 free (header_table); 1156 1157 } // end of if MPI_IO_TAG 1158 else //else not valid MPI file 1159 { 1160 *fileDescriptor = NOT_A_MPI_FILE; 1161 printf("Error openfile: The file %s you opened is not in syncIO new format, please check again! File descriptor = %d, MasterHeaderSize = %d, read_out_tag = %s\n",fname,*fileDescriptor,MasterHeaderSize,read_out_tag); 1162 //Printing MasterHeaderSize is useful to test a compiler bug on Intrepid BGP 1163 endTimer(&timer_end); 1164 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1165 return; 1166 } 1167 } // end of if "read" 1168 else if( cscompare( "write", imode ) ) 1169 { 1170 rc = MPI_File_open( PhastaIOActiveFiles[i]->local_comm, 1171 fname, 1172 MPI_MODE_WRONLY | MPI_MODE_CREATE, 1173 MPI_INFO_NULL, 1174 &(PhastaIOActiveFiles[i]->file_handle) ); 1175 if(rc) 1176 { 1177 *fileDescriptor = UNABLE_TO_OPEN_FILE; 1178 return; 1179 } 1180 } // end of if "write" 1181 free (fname); 1182 free (imode); 1183 } // end of if FileIndex != 0 1184 1185 endTimer(&timer_end); 1186 printPerf("openfile", timer_start, timer_end, 0, 0, ""); 1187 } 1188 1189 /** close file for both POSIX and MPI-IO syncIO format. 1190 * 1191 * If it's old POSIX format, simply call posix fclose(). 1192 * 1193 * If it's MPI-IO foramt: 1194 * in "read" mode, it simply close file with MPI-IO close routine. 1195 * in "write" mode, rank 0 in each group will re-assemble the master header and 1196 * offset table and write to the beginning of file, then close the file. 1197 */ 1198 void closefile( int* fileDescriptor, 1199 const char mode[] ) 1200 { 1201 double timer_start, timer_end; 1202 startTimer(&timer_start); 1203 1204 int i = *fileDescriptor; 1205 checkFileDescriptor("closefile",&i); 1206 1207 if ( PhastaIONextActiveIndex == 0 ) { 1208 char* imode = StringStripper( mode ); 1209 1210 if( cscompare( "write", imode ) 1211 || cscompare( "append", imode ) ) { 1212 fflush( fileArray[ *fileDescriptor - 1 ] ); 1213 } 1214 1215 fclose( fileArray[ *fileDescriptor - 1 ] ); 1216 free (imode); 1217 } 1218 else { 1219 char* imode = StringStripper( mode ); 1220 1221 //write master header here: 1222 if ( cscompare( "write", imode ) ) { 1223 // if ( PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields < 2*ONE_MEGABYTE/8 ) //SHOULD BE CHECKED 1224 // MasterHeaderSize = 4*ONE_MEGABYTE; 1225 // else 1226 // MasterHeaderSize = 4*ONE_MEGABYTE + PhastaIOActiveFiles[i]->nPPF * PhastaIOActiveFiles[i]->nFields * 8 - 2*ONE_MEGABYTE; 1227 1228 MasterHeaderSize = computeMHSize( PhastaIOActiveFiles[i]->nFields, PhastaIOActiveFiles[i]->nPPF, LATEST_WRITE_VERSION); 1229 phprintf_0("Info closefile: myrank = %d, MasterHeaderSize = %d\n", PhastaIOActiveFiles[i]->myrank, MasterHeaderSize); 1230 1231 MPI_Status write_header_status; 1232 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1233 char version[MAX_FIELDS_NAME_LENGTH/4]; 1234 char mhsize[MAX_FIELDS_NAME_LENGTH/4]; 1235 int magic_number = ENDIAN_TEST_NUMBER; 1236 1237 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) 1238 { 1239 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1240 sprintf(mpi_tag, "MPI_IO_Tag : "); 1241 memcpy(PhastaIOActiveFiles[i]->master_header, 1242 mpi_tag, 1243 MAX_FIELDS_NAME_LENGTH); 1244 1245 bzero((void*)version,MAX_FIELDS_NAME_LENGTH/4); 1246 // this version is "1", print version in ASCII 1247 sprintf(version, "version : %d",1); 1248 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/2, 1249 version, 1250 MAX_FIELDS_NAME_LENGTH/4); 1251 1252 // master header size is computed using the formula above 1253 bzero((void*)mhsize,MAX_FIELDS_NAME_LENGTH/4); 1254 sprintf(mhsize, "mhsize : "); 1255 memcpy(PhastaIOActiveFiles[i]->master_header + MAX_FIELDS_NAME_LENGTH/4*3, 1256 mhsize, 1257 MAX_FIELDS_NAME_LENGTH/4); 1258 1259 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1260 sprintf(mpi_tag, 1261 "\nnFields : %d\n", 1262 PhastaIOActiveFiles[i]->nFields); 1263 memcpy(PhastaIOActiveFiles[i]->master_header+MAX_FIELDS_NAME_LENGTH, 1264 mpi_tag, 1265 MAX_FIELDS_NAME_LENGTH); 1266 1267 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1268 sprintf(mpi_tag, "\nnPPF : %d\n", PhastaIOActiveFiles[i]->nPPF); 1269 memcpy( PhastaIOActiveFiles[i]->master_header+ 1270 PhastaIOActiveFiles[i]->nFields * 1271 MAX_FIELDS_NAME_LENGTH + 1272 MAX_FIELDS_NAME_LENGTH * 2, 1273 mpi_tag, 1274 MAX_FIELDS_NAME_LENGTH); 1275 1276 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("MPI_IO_Tag : ")-1, //-1 sizeof returns the size of the string+1 for "\0" 1277 &magic_number, 1278 sizeof(int)); 1279 1280 memcpy( PhastaIOActiveFiles[i]->master_header+sizeof("mhsize : ") -1 + MAX_FIELDS_NAME_LENGTH/4*3, 1281 &MasterHeaderSize, 1282 sizeof(int)); 1283 } 1284 1285 int j = 0; 1286 unsigned long long **header_table; 1287 header_table = ( unsigned long long ** )calloc(PhastaIOActiveFiles[i]->nFields, sizeof(unsigned long long *)); 1288 //header_table = ( unsigned long long ** ) calloc( PhastaIOActiveFiles[i]->nFields + pool_align, sizeof(unsigned long long *)); 1289 //mem_address = (long long )header_table; 1290 //if( mem_address & (pool_align -1) ) 1291 // header_table += pool_align - (mem_address & (pool_align -1)); 1292 1293 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1294 header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF , sizeof( unsigned long long)); 1295 //header_table[j]=( unsigned long long * ) calloc( PhastaIOActiveFiles[i]->nPPF + pool_align, sizeof (unsigned long long *)); 1296 //mem_address = (long long )header_table[j]; 1297 //if( mem_address & (pool_align -1) ) 1298 // header_table[j] += pool_align - (mem_address & (pool_align - 1)); 1299 } 1300 1301 //if( irank == 0 ) printf("gonna mpi_gather, myrank = %d\n", irank); 1302 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1303 MPI_Gather( PhastaIOActiveFiles[i]->my_offset_table[j], 1304 PhastaIOActiveFiles[i]->nppp, 1305 MPI_LONG_LONG_INT, 1306 header_table[j], 1307 PhastaIOActiveFiles[i]->nppp, 1308 MPI_LONG_LONG_INT, 1309 0, 1310 PhastaIOActiveFiles[i]->local_comm ); 1311 } 1312 1313 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1314 1315 //if( irank == 0 ) printf("gonna memcpy for every procs, myrank = %d\n", irank); 1316 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1317 memcpy ( PhastaIOActiveFiles[i]->master_header + 1318 VERSION_INFO_HEADER_SIZE + 1319 j * PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long), 1320 header_table[j], 1321 PhastaIOActiveFiles[i]->nPPF * sizeof(unsigned long long) ); 1322 } 1323 1324 //if( irank == 0 ) printf("gonna file_write_at(), myrank = %d\n", irank); 1325 MPI_File_write_at( PhastaIOActiveFiles[i]->file_handle, 1326 0, 1327 PhastaIOActiveFiles[i]->master_header, 1328 MasterHeaderSize, 1329 MPI_CHAR, 1330 &write_header_status ); 1331 } 1332 1333 ////free(PhastaIOActiveFiles[i]->master_header); 1334 1335 for ( j = 0; j < PhastaIOActiveFiles[i]->nFields; j++ ) { 1336 free ( header_table[j] ); 1337 } 1338 free (header_table); 1339 } 1340 1341 //if( irank == 0 ) printf("gonna file_close(), myrank = %d\n", irank); 1342 MPI_File_close( &( PhastaIOActiveFiles[i]->file_handle ) ); 1343 free ( imode ); 1344 } 1345 1346 endTimer(&timer_end); 1347 printPerf("closefile_", timer_start, timer_end, 0, 0, ""); 1348 } 1349 1350 void readheader( int* fileDescriptor, 1351 const char keyphrase[], 1352 void* valueArray, 1353 int* nItems, 1354 const char datatype[], 1355 const char iotype[] ) 1356 { 1357 double timer_start, timer_end; 1358 //MPI_Comm_rank(MPI_COMM_WORLD, &irank); //This should not be required if irank is indeed a global variable. irank should be initialized by either query and/or init 1359 //if(irank == 0) printf("entering readheader() - %s\n", keyphrase); 1360 startTimer(&timer_start); 1361 1362 int i = *fileDescriptor; 1363 checkFileDescriptor("readheader",&i); 1364 1365 if ( PhastaIONextActiveIndex == 0 ) { 1366 int filePtr = *fileDescriptor - 1; 1367 FILE* fileObject; 1368 int* valueListInt; 1369 1370 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1371 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1372 fprintf(stderr,"openfile_ function has to be called before \n") ; 1373 fprintf(stderr,"acessing the file\n ") ; 1374 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1375 endTimer(&timer_end); 1376 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1377 return; 1378 } 1379 1380 LastHeaderKey[ filePtr ] = const_cast< char* >( keyphrase ); 1381 LastHeaderNotFound = false; 1382 1383 fileObject = fileArray[ filePtr ] ; 1384 Wrong_Endian = byte_order[ filePtr ]; 1385 1386 isBinary( iotype ); 1387 typeSize( datatype ); //redundant call, just avoid a compiler warning. 1388 1389 // right now we are making the assumption that we will only write integers 1390 // on the header line. 1391 1392 valueListInt = static_cast< int* >( valueArray ); 1393 int ierr = readHeader( fileObject , 1394 keyphrase, 1395 valueListInt, 1396 *nItems ) ; 1397 1398 byte_order[ filePtr ] = Wrong_Endian ; 1399 1400 if ( ierr ) LastHeaderNotFound = true; 1401 1402 //return ; // don't return, go to the end to print perf 1403 } 1404 else { 1405 unsigned int skip_size; 1406 int* valueListInt; 1407 valueListInt = static_cast <int*>(valueArray); 1408 char* token; 1409 bool FOUND = false ; 1410 isBinary( iotype ); 1411 1412 MPI_Status read_offset_status; 1413 char read_out_tag[MAX_FIELDS_NAME_LENGTH]; 1414 char readouttag[MAX_FIELDS_NUMBER][MAX_FIELDS_NAME_LENGTH]; 1415 int j; 1416 1417 int string_length = strlen( keyphrase ); 1418 char* buffer = (char*) malloc ( string_length+1 ); 1419 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1420 //mem_address = (long long )buffer; 1421 //if( mem_address & (pool_align -1) ) 1422 // buffer += pool_align - (mem_address & (pool_align -1)); 1423 1424 strcpy ( buffer, keyphrase ); 1425 buffer[ string_length ] = '\0'; 1426 1427 char* st2 = strtok ( buffer, "@" ); 1428 st2 = strtok (NULL, "@"); 1429 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1430 if ( char* p = strpbrk(buffer, "@") ) 1431 *p = '\0'; 1432 1433 // Check if the user has input the right GPid 1434 if ( ( PhastaIOActiveFiles[i]->GPid <= 1435 PhastaIOActiveFiles[i]->myrank * 1436 PhastaIOActiveFiles[i]->nppp )|| 1437 ( PhastaIOActiveFiles[i]->GPid > 1438 ( PhastaIOActiveFiles[i]->myrank + 1 ) * 1439 PhastaIOActiveFiles[i]->nppp ) ) 1440 { 1441 *fileDescriptor = NOT_A_MPI_FILE; 1442 printf("Error readheader: The file is not in syncIO new format, please check! myrank = %d, GPid = %d, nppp = %d, keyphrase = %s\n", PhastaIOActiveFiles[i]->myrank, PhastaIOActiveFiles[i]->GPid, PhastaIOActiveFiles[i]->nppp, keyphrase); 1443 // It is possible atoi could not generate a clear integer from st2 because of additional garbage character in keyphrase 1444 endTimer(&timer_end); 1445 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1446 return; 1447 } 1448 1449 // Find the field we want ... 1450 //for ( j = 0; j<MAX_FIELDS_NUMBER; j++ ) 1451 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1452 { 1453 memcpy( readouttag[j], 1454 PhastaIOActiveFiles[i]->master_header + j*MAX_FIELDS_NAME_LENGTH+MAX_FIELDS_NAME_LENGTH*2+1, 1455 MAX_FIELDS_NAME_LENGTH-1 ); 1456 } 1457 1458 for ( j = 0; j<PhastaIOActiveFiles[i]->nFields; j++ ) 1459 { 1460 token = strtok ( readouttag[j], ":" ); 1461 1462 //if ( cscompare( buffer, token ) ) 1463 if ( cscompare( token , buffer ) && cscompare( buffer, token ) ) 1464 // This double comparison is required for the field "number of nodes" and all the other fields that start with "number of nodes" (i.g. number of nodes in the mesh"). 1465 // Would be safer to rename "number of nodes" by "number of nodes in the part" so that the name are completely unique. But much more work to do that (Nspre, phParAdapt, etc). 1466 // Since the field name are unique in SyncIO (as it includes part ID), this should be safe and there should be no issue with the "?" trailing character. 1467 { 1468 PhastaIOActiveFiles[i]->read_field_count = j; 1469 FOUND = true; 1470 //printf("buffer: %s | token: %s | j: %d\n",buffer,token,j); 1471 break; 1472 } 1473 } 1474 free(buffer); 1475 1476 if (!FOUND) 1477 { 1478 //if(irank==0) printf("Warning readheader: Not found %s \n",keyphrase); //PhastaIOActiveFiles[i]->myrank is certainly initialized here. 1479 if(PhastaIOActiveFiles[i]->myrank == 0) printf("WARNING readheader: Not found %s\n",keyphrase); 1480 endTimer(&timer_end); 1481 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1482 return; 1483 } 1484 1485 // Find the part we want ... 1486 PhastaIOActiveFiles[i]->read_part_count = PhastaIOActiveFiles[i]->GPid - 1487 PhastaIOActiveFiles[i]->myrank * PhastaIOActiveFiles[i]->nppp - 1; 1488 1489 PhastaIOActiveFiles[i]->my_offset = 1490 PhastaIOActiveFiles[i]->my_read_table[PhastaIOActiveFiles[i]->read_field_count][PhastaIOActiveFiles[i]->read_part_count]; 1491 1492 // printf("****Rank %d offset is %d\n",PhastaIOActiveFiles[i]->myrank,PhastaIOActiveFiles[i]->my_offset); 1493 1494 // Read each datablock header here ... 1495 1496 MPI_File_read_at_all( PhastaIOActiveFiles[i]->file_handle, 1497 PhastaIOActiveFiles[i]->my_offset+1, 1498 read_out_tag, 1499 MAX_FIELDS_NAME_LENGTH-1, 1500 MPI_CHAR, 1501 &read_offset_status ); 1502 token = strtok ( read_out_tag, ":" ); 1503 1504 // printf("&&&&Rank %d read_out_tag is %s\n",PhastaIOActiveFiles[i]->myrank,read_out_tag); 1505 1506 if( cscompare( keyphrase , token ) ) //No need to compare also token with keyphrase like above. We should already have the right one. Otherwise there is a problem. 1507 { 1508 FOUND = true ; 1509 token = strtok( NULL, " ,;<>" ); 1510 skip_size = atoi( token ); 1511 for( j=0; j < *nItems && ( token = strtok( NULL," ,;<>") ); j++ ) 1512 valueListInt[j] = atoi( token ); 1513 1514 if ( j < *nItems ) 1515 { 1516 fprintf( stderr, "Expected # of ints not found for: %s\n", keyphrase ); 1517 } 1518 } 1519 else { 1520 //if(irank==0) 1521 if(PhastaIOActiveFiles[i]->myrank == 0) 1522 // If we enter this if, there is a problem with the name of some fields 1523 { 1524 printf("Error readheader: Unexpected mismatch between keyphrase = %s and token = %s\n",keyphrase,token); 1525 } 1526 } 1527 } 1528 1529 endTimer(&timer_end); 1530 printPerf("readheader", timer_start, timer_end, 0, 0, ""); 1531 1532 } 1533 1534 void readdatablock( int* fileDescriptor, 1535 const char keyphrase[], 1536 void* valueArray, 1537 int* nItems, 1538 const char datatype[], 1539 const char iotype[] ) 1540 { 1541 1542 //if(irank == 0) printf("entering readdatablock()\n"); 1543 unsigned long long data_size = 0; 1544 double timer_start, timer_end; 1545 startTimer(&timer_start); 1546 1547 int i = *fileDescriptor; 1548 checkFileDescriptor("readdatablock",&i); 1549 1550 if ( PhastaIONextActiveIndex == 0 ) { 1551 int filePtr = *fileDescriptor - 1; 1552 FILE* fileObject; 1553 char junk; 1554 1555 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1556 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1557 fprintf(stderr,"openfile_ function has to be called before\n") ; 1558 fprintf(stderr,"acessing the file\n ") ; 1559 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1560 endTimer(&timer_end); 1561 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1562 return; 1563 } 1564 1565 // error check.. 1566 // since we require that a consistant header always preceed the data block 1567 // let us check to see that it is actually the case. 1568 1569 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1570 fprintf(stderr, "Header not consistant with data block\n"); 1571 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1572 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1573 fprintf(stderr, "Please recheck read sequence \n"); 1574 if( Strict_Error ) { 1575 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1576 endTimer(&timer_end); 1577 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1578 return; 1579 } 1580 } 1581 1582 if ( LastHeaderNotFound ) { 1583 endTimer(&timer_end); 1584 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1585 return; 1586 } 1587 fileObject = fileArray[ filePtr ]; 1588 Wrong_Endian = byte_order[ filePtr ]; 1589 1590 size_t type_size = typeSize( datatype ); 1591 int nUnits = *nItems; 1592 isBinary( iotype ); 1593 1594 if ( binary_format ) { 1595 fread( valueArray, type_size, nUnits, fileObject ); 1596 fread( &junk, sizeof(char), 1 , fileObject ); 1597 if ( Wrong_Endian ) SwapArrayByteOrder( valueArray, type_size, nUnits ); 1598 } else { 1599 1600 char* ts1 = StringStripper( datatype ); 1601 if ( cscompare( "integer", ts1 ) ) { 1602 for( int n=0; n < nUnits ; n++ ) 1603 fscanf(fileObject, "%d\n",(int*)((int*)valueArray+n) ); 1604 } else if ( cscompare( "double", ts1 ) ) { 1605 for( int n=0; n < nUnits ; n++ ) 1606 fscanf(fileObject, "%lf\n",(double*)((double*)valueArray+n) ); 1607 } 1608 free (ts1); 1609 } 1610 1611 //return; 1612 } 1613 else { 1614 // printf("read data block\n"); 1615 MPI_Status read_data_status; 1616 size_t type_size = typeSize( datatype ); 1617 int nUnits = *nItems; 1618 isBinary( iotype ); 1619 1620 // read datablock then 1621 //MR CHANGE 1622 // if ( cscompare ( datatype, "double")) 1623 char* ts2 = StringStripper( datatype ); 1624 if ( cscompare ( "double" , ts2)) 1625 //MR CHANGE END 1626 { 1627 1628 MPI_File_read_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 1629 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1630 valueArray, 1631 nUnits, 1632 MPI_DOUBLE ); 1633 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1634 valueArray, 1635 &read_data_status ); 1636 data_size=8*nUnits; 1637 1638 } 1639 //MR CHANGE 1640 // else if ( cscompare ( datatype, "integer")) 1641 else if ( cscompare ( "integer" , ts2)) 1642 //MR CHANGE END 1643 { 1644 MPI_File_read_at_all_begin(PhastaIOActiveFiles[i]->file_handle, 1645 PhastaIOActiveFiles[i]->my_offset + DB_HEADER_SIZE, 1646 valueArray, 1647 nUnits, 1648 MPI_INT ); 1649 MPI_File_read_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1650 valueArray, 1651 &read_data_status ); 1652 data_size=4*nUnits; 1653 } 1654 else 1655 { 1656 *fileDescriptor = DATA_TYPE_ILLEGAL; 1657 printf("readdatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 1658 endTimer(&timer_end); 1659 printPerf("readdatablock", timer_start, timer_end, 0, 0, ""); 1660 return; 1661 } 1662 free(ts2); 1663 1664 1665 // printf("%d Read finishe\n",PhastaIOActiveFiles[i]->myrank); 1666 1667 // Swap data byte order if endianess is different ... 1668 if ( PhastaIOActiveFiles[i]->Wrong_Endian ) 1669 { 1670 SwapArrayByteOrder( valueArray, type_size, nUnits ); 1671 } 1672 } 1673 1674 endTimer(&timer_end); 1675 char extra_msg[1024]; 1676 memset(extra_msg, '\0', 1024); 1677 char* key = StringStripper(keyphrase); 1678 sprintf(extra_msg, " field is %s ", key); 1679 printPerf("readdatablock", timer_start, timer_end, data_size, 1, extra_msg); 1680 free(key); 1681 1682 } 1683 1684 void writeheader( const int* fileDescriptor, 1685 const char keyphrase[], 1686 const void* valueArray, 1687 const int* nItems, 1688 const int* ndataItems, 1689 const char datatype[], 1690 const char iotype[]) 1691 { 1692 1693 //if(irank == 0) printf("entering writeheader()\n"); 1694 1695 double timer_start, timer_end; 1696 startTimer(&timer_start); 1697 1698 int i = *fileDescriptor; 1699 checkFileDescriptor("writeheader",&i); 1700 1701 if ( PhastaIONextActiveIndex == 0 ) { 1702 int filePtr = *fileDescriptor - 1; 1703 FILE* fileObject; 1704 1705 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1706 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1707 fprintf(stderr,"openfile_ function has to be called before \n") ; 1708 fprintf(stderr,"acessing the file\n ") ; 1709 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1710 endTimer(&timer_end); 1711 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1712 return; 1713 } 1714 1715 LastHeaderKey[ filePtr ] = const_cast< char* >( keyphrase ); 1716 DataSize = *ndataItems; 1717 fileObject = fileArray[ filePtr ] ; 1718 size_t type_size = typeSize( datatype ); 1719 isBinary( iotype ); 1720 header_type[ filePtr ] = type_size; 1721 1722 int _newline = ( *ndataItems > 0 ) ? sizeof( char ) : 0; 1723 int size_of_nextblock = 1724 ( binary_format ) ? type_size*( *ndataItems )+ _newline : *ndataItems ; 1725 1726 fprintf( fileObject, "%s : < %d > ", keyphrase, size_of_nextblock ); 1727 for( int i = 0; i < *nItems; i++ ) 1728 fprintf(fileObject, "%d ", *((int*)((int*)valueArray+i))); 1729 fprintf(fileObject, "\n"); 1730 1731 //return ; 1732 } 1733 else { // else it's parallel I/O 1734 DataSize = *ndataItems; 1735 size_t type_size = typeSize( datatype ); 1736 isBinary( iotype ); 1737 char mpi_tag[MAX_FIELDS_NAME_LENGTH]; 1738 1739 int string_length = strlen( keyphrase ); 1740 char* buffer = (char*) malloc ( string_length+1 ); 1741 //char* buffer = ( char * ) malloc( string_length + 1 + pool_align ); 1742 //mem_address = (long long )buffer; 1743 //if( mem_address & (pool_align -1) ) 1744 // buffer += pool_align - (mem_address & (pool_align -1)); 1745 1746 strcpy ( buffer, keyphrase); 1747 buffer[ string_length ] = '\0'; 1748 1749 char* st2 = strtok ( buffer, "@" ); 1750 st2 = strtok (NULL, "@"); 1751 PhastaIOActiveFiles[i]->GPid = atoi(st2); 1752 1753 if ( char* p = strpbrk(buffer, "@") ) 1754 *p = '\0'; 1755 1756 bzero((void*)mpi_tag,MAX_FIELDS_NAME_LENGTH); 1757 sprintf(mpi_tag, "\n%s : %d\n", buffer, PhastaIOActiveFiles[i]->field_count); 1758 unsigned long long offset_value; 1759 1760 int temp = *ndataItems; 1761 unsigned long long number_of_items = (unsigned long long)temp; 1762 MPI_Barrier(PhastaIOActiveFiles[i]->local_comm); 1763 1764 MPI_Scan( &number_of_items, 1765 &offset_value, 1766 1, 1767 MPI_LONG_LONG_INT, 1768 MPI_SUM, 1769 PhastaIOActiveFiles[i]->local_comm ); 1770 1771 offset_value = (offset_value - number_of_items) * type_size; 1772 1773 offset_value += PhastaIOActiveFiles[i]->local_myrank * 1774 DB_HEADER_SIZE + 1775 PhastaIOActiveFiles[i]->next_start_address; 1776 // This offset is the starting address of each datablock header... 1777 PhastaIOActiveFiles[i]->my_offset = offset_value; 1778 1779 // Write in my offset table ... 1780 PhastaIOActiveFiles[i]->my_offset_table[PhastaIOActiveFiles[i]->field_count][PhastaIOActiveFiles[i]->part_count] = 1781 PhastaIOActiveFiles[i]->my_offset; 1782 1783 // Update the next-start-address ... 1784 PhastaIOActiveFiles[i]->next_start_address = offset_value + 1785 number_of_items * type_size + 1786 DB_HEADER_SIZE; 1787 MPI_Bcast( &(PhastaIOActiveFiles[i]->next_start_address), 1788 1, 1789 MPI_LONG_LONG_INT, 1790 PhastaIOActiveFiles[i]->local_numprocs-1, 1791 PhastaIOActiveFiles[i]->local_comm ); 1792 1793 // Prepare datablock header ... 1794 int _newline = (*ndataItems>0)?sizeof(char):0; 1795 unsigned int size_of_nextblock = type_size * (*ndataItems) + _newline; 1796 1797 //char datablock_header[255]; 1798 //bzero((void*)datablock_header,255); 1799 char datablock_header[DB_HEADER_SIZE]; 1800 bzero((void*)datablock_header,DB_HEADER_SIZE); 1801 1802 PhastaIOActiveFiles[i]->GPid = PhastaIOActiveFiles[i]->nppp*PhastaIOActiveFiles[i]->myrank+PhastaIOActiveFiles[i]->part_count; 1803 sprintf( datablock_header, 1804 "\n%s : < %u >", 1805 keyphrase, 1806 size_of_nextblock ); 1807 1808 for ( int j = 0; j < *nItems; j++ ) 1809 { 1810 sprintf( datablock_header, 1811 "%s %d ", 1812 datablock_header, 1813 *((int*)((int*)valueArray+j))); 1814 } 1815 sprintf( datablock_header, 1816 "%s\n ", 1817 datablock_header ); 1818 1819 // Write datablock header ... 1820 //MR CHANGE 1821 // if ( cscompare(datatype,"double") ) 1822 char* ts1 = StringStripper( datatype ); 1823 if ( cscompare("double",ts1) ) 1824 //MR CHANGE END 1825 { 1826 free ( PhastaIOActiveFiles[i]->double_chunk ); 1827 PhastaIOActiveFiles[i]->double_chunk = ( double * )malloc( (sizeof( double )*number_of_items+ DB_HEADER_SIZE)); 1828 //PhastaIOActiveFiles[i]->double_chunk = ( double * ) malloc( sizeof( double )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1829 //mem_address = (long long )PhastaIOActiveFiles[i]->double_chunk; 1830 //if( mem_address & (pool_align -1) ) 1831 // PhastaIOActiveFiles[i]->double_chunk += pool_align - (mem_address & (pool_align -1)); 1832 1833 double * aa = ( double * )datablock_header; 1834 memcpy(PhastaIOActiveFiles[i]->double_chunk, aa, DB_HEADER_SIZE); 1835 } 1836 //MR CHANGE 1837 // if ( cscompare(datatype,"integer") ) 1838 else if ( cscompare("integer",ts1) ) 1839 //MR CHANGE END 1840 { 1841 free ( PhastaIOActiveFiles[i]->int_chunk ); 1842 PhastaIOActiveFiles[i]->int_chunk = ( int * )malloc( (sizeof( int )*number_of_items+ DB_HEADER_SIZE)); 1843 //PhastaIOActiveFiles[i]->int_chunk = ( int * ) malloc( sizeof( int )*number_of_items+ DB_HEADER_SIZE + pool_align ); 1844 //mem_address = (long long )PhastaIOActiveFiles[i]->int_chunk; 1845 //if( mem_address & (pool_align -1) ) 1846 // PhastaIOActiveFiles[i]->int_chunk += pool_align - (mem_address & ( pool_align -1)); 1847 1848 int * aa = ( int * )datablock_header; 1849 memcpy(PhastaIOActiveFiles[i]->int_chunk, aa, DB_HEADER_SIZE); 1850 } 1851 else { 1852 // *fileDescriptor = DATA_TYPE_ILLEGAL; 1853 printf("writeheader - DATA_TYPE_ILLEGAL - %s\n",datatype); 1854 endTimer(&timer_end); 1855 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1856 return; 1857 } 1858 free(ts1); 1859 1860 PhastaIOActiveFiles[i]->part_count++; 1861 if ( PhastaIOActiveFiles[i]->part_count == PhastaIOActiveFiles[i]->nppp ) { 1862 //A new field will be written 1863 if ( PhastaIOActiveFiles[i]->local_myrank == 0 ) { 1864 memcpy( PhastaIOActiveFiles[i]->master_header + 1865 PhastaIOActiveFiles[i]->field_count * 1866 MAX_FIELDS_NAME_LENGTH + 1867 MAX_FIELDS_NAME_LENGTH * 2, 1868 mpi_tag, 1869 MAX_FIELDS_NAME_LENGTH-1); 1870 } 1871 PhastaIOActiveFiles[i]->field_count++; 1872 PhastaIOActiveFiles[i]->part_count=0; 1873 } 1874 free(buffer); 1875 } 1876 1877 endTimer(&timer_end); 1878 printPerf("writeheader", timer_start, timer_end, 0, 0, ""); 1879 } 1880 1881 void writedatablock( const int* fileDescriptor, 1882 const char keyphrase[], 1883 const void* valueArray, 1884 const int* nItems, 1885 const char datatype[], 1886 const char iotype[] ) 1887 { 1888 //if(irank == 0) printf("entering writedatablock()\n"); 1889 1890 unsigned long long data_size = 0; 1891 double timer_start, timer_end; 1892 startTimer(&timer_start); 1893 1894 int i = *fileDescriptor; 1895 checkFileDescriptor("writedatablock",&i); 1896 1897 if ( PhastaIONextActiveIndex == 0 ) { 1898 int filePtr = *fileDescriptor - 1; 1899 1900 if ( *fileDescriptor < 1 || *fileDescriptor > (int)fileArray.size() ) { 1901 fprintf(stderr,"No file associated with Descriptor %d\n",*fileDescriptor); 1902 fprintf(stderr,"openfile_ function has to be called before \n") ; 1903 fprintf(stderr,"acessing the file\n ") ; 1904 fprintf(stderr,"fatal error: cannot continue, returning out of call\n"); 1905 endTimer(&timer_end); 1906 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1907 return; 1908 } 1909 // since we require that a consistant header always preceed the data block 1910 // let us check to see that it is actually the case. 1911 1912 if ( ! cscompare( LastHeaderKey[ filePtr ], keyphrase ) ) { 1913 fprintf(stderr, "Header not consistant with data block\n"); 1914 fprintf(stderr, "Header: %s\n", LastHeaderKey[ filePtr ] ); 1915 fprintf(stderr, "DataBlock: %s\n ", keyphrase ); 1916 fprintf(stderr, "Please recheck write sequence \n"); 1917 if( Strict_Error ) { 1918 fprintf(stderr, "fatal error: cannot continue, returning out of call\n"); 1919 endTimer(&timer_end); 1920 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1921 return; 1922 } 1923 } 1924 1925 FILE* fileObject = fileArray[ filePtr ] ; 1926 size_t type_size=typeSize( datatype ); 1927 isBinary( iotype ); 1928 1929 if ( header_type[filePtr] != (int)type_size ) { 1930 fprintf(stderr,"header and datablock differ on typeof data in the block for\n"); 1931 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1932 if( Strict_Error ) { 1933 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1934 endTimer(&timer_end); 1935 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1936 return; 1937 } 1938 } 1939 1940 int nUnits = *nItems; 1941 1942 if ( nUnits != DataSize ) { 1943 fprintf(stderr,"header and datablock differ on number of data items for\n"); 1944 fprintf(stderr,"keyphrase : %s\n", keyphrase); 1945 if( Strict_Error ) { 1946 fprintf(stderr,"fatal error: cannot continue, returning out of call\n" ); 1947 endTimer(&timer_end); 1948 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 1949 return; 1950 } 1951 } 1952 1953 if ( binary_format ) { 1954 1955 fwrite( valueArray, type_size, nUnits, fileObject ); 1956 fprintf( fileObject,"\n"); 1957 1958 } else { 1959 1960 char* ts1 = StringStripper( datatype ); 1961 if ( cscompare( "integer", ts1 ) ) { 1962 for( int n=0; n < nUnits ; n++ ) 1963 fprintf(fileObject,"%d\n",*((int*)((int*)valueArray+n))); 1964 } else if ( cscompare( "double", ts1 ) ) { 1965 for( int n=0; n < nUnits ; n++ ) 1966 fprintf(fileObject,"%lf\n",*((double*)((double*)valueArray+n))); 1967 } 1968 free (ts1); 1969 } 1970 //return ; 1971 } 1972 else { // syncIO case 1973 MPI_Status write_data_status; 1974 isBinary( iotype ); 1975 int nUnits = *nItems; 1976 1977 //MR CHANGE 1978 // if ( cscompare(datatype,"double") ) 1979 char* ts1 = StringStripper( datatype ); 1980 if ( cscompare("double",ts1) ) 1981 //MR CHANGE END 1982 { 1983 memcpy((PhastaIOActiveFiles[i]->double_chunk+DB_HEADER_SIZE/sizeof(double)), valueArray, nUnits*sizeof(double)); 1984 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 1985 PhastaIOActiveFiles[i]->my_offset, 1986 PhastaIOActiveFiles[i]->double_chunk, 1987 //BLOCK_SIZE/sizeof(double), 1988 nUnits+DB_HEADER_SIZE/sizeof(double), 1989 MPI_DOUBLE ); 1990 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 1991 PhastaIOActiveFiles[i]->double_chunk, 1992 &write_data_status ); 1993 data_size=8*nUnits; 1994 } 1995 //MR CHANGE 1996 // else if ( cscompare ( datatype, "integer")) 1997 else if ( cscompare("integer",ts1) ) 1998 //MR CHANGE END 1999 { 2000 memcpy((PhastaIOActiveFiles[i]->int_chunk+DB_HEADER_SIZE/sizeof(int)), valueArray, nUnits*sizeof(int)); 2001 MPI_File_write_at_all_begin( PhastaIOActiveFiles[i]->file_handle, 2002 PhastaIOActiveFiles[i]->my_offset, 2003 PhastaIOActiveFiles[i]->int_chunk, 2004 nUnits+DB_HEADER_SIZE/sizeof(int), 2005 MPI_INT ); 2006 MPI_File_write_at_all_end( PhastaIOActiveFiles[i]->file_handle, 2007 PhastaIOActiveFiles[i]->int_chunk, 2008 &write_data_status ); 2009 data_size=4*nUnits; 2010 } 2011 else { 2012 printf("Error: writedatablock - DATA_TYPE_ILLEGAL - %s\n",datatype); 2013 endTimer(&timer_end); 2014 printPerf("writedatablock", timer_start, timer_end, 0, 0, ""); 2015 return; 2016 } 2017 free(ts1); 2018 } 2019 2020 endTimer(&timer_end); 2021 char extra_msg[1024]; 2022 memset(extra_msg, '\0', 1024); 2023 char* key = StringStripper(keyphrase); 2024 sprintf(extra_msg, " field is %s ", key); 2025 printPerf("writedatablock", timer_start, timer_end, data_size, 1, extra_msg); 2026 free(key); 2027 2028 } 2029 2030 void 2031 SwapArrayByteOrder( void* array, 2032 int nbytes, 2033 int nItems ) 2034 { 2035 /* This swaps the byte order for the array of nItems each 2036 of size nbytes , This will be called only locally */ 2037 int i,j; 2038 unsigned char* ucDst = (unsigned char*)array; 2039 2040 for(i=0; i < nItems; i++) { 2041 for(j=0; j < (nbytes/2); j++) 2042 std::swap( ucDst[j] , ucDst[(nbytes - 1) - j] ); 2043 ucDst += nbytes; 2044 } 2045 } 2046 2047 void 2048 writestring( int* fileDescriptor, 2049 const char inString[] ) 2050 { 2051 2052 int filePtr = *fileDescriptor - 1; 2053 FILE* fileObject = fileArray[filePtr] ; 2054 fprintf(fileObject,"%s",inString ); 2055 return; 2056 } 2057 2058 void 2059 Gather_Headers( int* fileDescriptor, 2060 vector< string >& headers ) 2061 { 2062 2063 FILE* fileObject; 2064 char Line[1024]; 2065 2066 fileObject = fileArray[ (*fileDescriptor)-1 ]; 2067 2068 while( !feof(fileObject) ) { 2069 fgets( Line, 1024, fileObject); 2070 if ( Line[0] == '#' ) { 2071 headers.push_back( Line ); 2072 } else { 2073 break; 2074 } 2075 } 2076 rewind( fileObject ); 2077 clearerr( fileObject ); 2078 } 2079 void 2080 isWrong( void ) { (Wrong_Endian) ? fprintf(stdout,"YES\n"): fprintf(stdout,"NO\n") ; } 2081 2082 void 2083 togglestrictmode( void ) { Strict_Error = !Strict_Error; } 2084 2085 int 2086 isLittleEndian( void ) 2087 { 2088 // this function returns a 1 if the current running architecture is 2089 // LittleEndian Byte Ordered, else it returns a zero 2090 2091 union { 2092 long a; 2093 char c[sizeof( long )]; 2094 } endianUnion; 2095 2096 endianUnion.a = 1 ; 2097 2098 if ( endianUnion.c[sizeof(long)-1] != 1 ) return 1 ; 2099 else return 0; 2100 } 2101 2102 namespace PHASTA { 2103 const char* const PhastaIO_traits<int>::type_string = "integer"; 2104 const char* const PhastaIO_traits<double>::type_string = "double"; 2105 } 2106