1 /*$Id: sbaijov.c,v 1.65 2001/08/06 21:15:42 bsmith Exp $*/ 2 3 /* 4 Routines to compute overlapping regions of a parallel MPI matrix. 5 Used for finding submatrices that were shared across processors. 6 */ 7 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 8 #include "petscbt.h" 9 10 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat,int,IS *); 11 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat,int,char **,int*,int**); 12 static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat,int,int **,int**,int*); 13 14 /* this function is sasme as MatCompressIndicesGeneral_MPIBAIJ -- should be removed! */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatCompressIndicesGeneral_MPISBAIJ" 17 static int MatCompressIndicesGeneral_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[]) 18 { 19 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)C->data; 20 int ierr,isz,bs = baij->bs,n,i,j,*idx,ival; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 int tt, gid1, *nidx; 24 PetscTablePosition tpos; 25 #else 26 int Nbs,*nidx; 27 PetscBT table; 28 #endif 29 30 PetscFunctionBegin; 31 /* printf(" ...MatCompressIndicesGeneral_MPISBAIJ is called ...\n"); */ 32 #if defined (PETSC_USE_CTABLE) 33 ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr); 34 #else 35 Nbs = baij->Nbs; 36 ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 37 ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr); 38 #endif 39 for (i=0; i<imax; i++) { 40 isz = 0; 41 #if defined (PETSC_USE_CTABLE) 42 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 43 #else 44 ierr = PetscBTMemzero(Nbs,table);CHKERRQ(ierr); 45 #endif 46 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 47 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 48 for (j=0; j<n ; j++) { 49 ival = idx[j]/bs; /* convert the indices into block indices */ 50 #if defined (PETSC_USE_CTABLE) 51 ierr = PetscTableFind(gid1_lid1,ival+1,&tt);CHKERRQ(ierr); 52 if (!tt) { 53 ierr = PetscTableAdd(gid1_lid1,ival+1,isz+1);CHKERRQ(ierr); 54 isz++; 55 } 56 #else 57 if (ival>Nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 58 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 59 #endif 60 } 61 ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscMalloc((isz+1)*sizeof(int),&nidx);CHKERRQ(ierr); 64 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 65 j = 0; 66 while (tpos) { 67 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr); 68 if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); } 69 nidx[tt] = gid1 - 1; 70 j++; 71 } 72 if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); } 73 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); 74 ierr = PetscFree(nidx);CHKERRQ(ierr); 75 #else 76 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); 77 #endif 78 } 79 #if defined (PETSC_USE_CTABLE) 80 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 81 #else 82 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 83 ierr = PetscFree(nidx);CHKERRQ(ierr); 84 #endif 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatCompressIndicesSorted_MPISBAIJ" 90 static int MatCompressIndicesSorted_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[]) 91 { 92 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)C->data; 93 int ierr,bs=baij->bs,i,j,k,val,n,*idx,*nidx,*idx_local; 94 PetscTruth flg; 95 #if defined (PETSC_USE_CTABLE) 96 int maxsz; 97 #else 98 int Nbs=baij->Nbs; 99 #endif 100 PetscFunctionBegin; 101 printf(" ... MatCompressIndicesSorted_MPISBAIJ is called ...\n"); 102 for (i=0; i<imax; i++) { 103 ierr = ISSorted(is_in[i],&flg);CHKERRQ(ierr); 104 if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Indices are not sorted"); 105 } 106 #if defined (PETSC_USE_CTABLE) 107 /* Now check max size */ 108 for (i=0,maxsz=0; i<imax; i++) { 109 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 110 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 111 if (n%bs !=0) SETERRQ(1,"Indices are not block ordered"); 112 n = n/bs; /* The reduced index size */ 113 if (n > maxsz) maxsz = n; 114 } 115 ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr); 116 #else 117 ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 118 #endif 119 /* Now check if the indices are in block order */ 120 for (i=0; i<imax; i++) { 121 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 122 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 123 if (n%bs !=0) SETERRQ(1,"Indices are not block ordered"); 124 125 n = n/bs; /* The reduced index size */ 126 idx_local = idx; 127 for (j=0; j<n ; j++) { 128 val = idx_local[0]; 129 if (val%bs != 0) SETERRQ(1,"Indices are not block ordered"); 130 for (k=0; k<bs; k++) { 131 if (val+k != idx_local[k]) SETERRQ(1,"Indices are not block ordered"); 132 } 133 nidx[j] = val/bs; 134 idx_local +=bs; 135 } 136 ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); 137 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,nidx,(is_out+i));CHKERRQ(ierr); 138 } 139 ierr = PetscFree(nidx);CHKERRQ(ierr); 140 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatExpandIndices_MPISBAIJ" 146 static int MatExpandIndices_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[]) 147 { 148 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)C->data; 149 int ierr,bs = baij->bs,n,i,j,k,*idx,*nidx; 150 #if defined (PETSC_USE_CTABLE) 151 int maxsz; 152 #else 153 int Nbs = baij->Nbs; 154 #endif 155 156 PetscFunctionBegin; 157 /* printf(" ... MatExpandIndices_MPISBAIJ is called ...\n"); */ 158 #if defined (PETSC_USE_CTABLE) 159 /* Now check max size */ 160 for (i=0,maxsz=0; i<imax; i++) { 161 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 162 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 163 if (n*bs > maxsz) maxsz = n*bs; 164 } 165 ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr); 166 #else 167 ierr = PetscMalloc((Nbs*bs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 168 #endif 169 170 for (i=0; i<imax; i++) { 171 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 172 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 173 for (j=0; j<n ; ++j){ 174 for (k=0; k<bs; k++) 175 nidx[j*bs+k] = idx[j]*bs+k; 176 } 177 ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); 178 ierr = ISCreateGeneral(PETSC_COMM_SELF,n*bs,nidx,is_out+i);CHKERRQ(ierr); 179 } 180 ierr = PetscFree(nidx);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ" 187 int MatIncreaseOverlap_MPISBAIJ(Mat C,int is_max,IS is[],int ov) 188 { 189 int i,ierr; 190 IS *is_new; 191 192 PetscFunctionBegin; 193 ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); 194 /* Convert the indices into block format */ 195 ierr = MatCompressIndicesGeneral_MPISBAIJ(C,is_max,is,is_new);CHKERRQ(ierr); 196 if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} 197 for (i=0; i<ov; ++i) { 198 ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); 199 } 200 for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);} 201 ierr = MatExpandIndices_MPISBAIJ(C,is_max,is_new,is);CHKERRQ(ierr); 202 for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);} 203 ierr = PetscFree(is_new);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once" 209 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat C,int is_max,IS is[]) 210 { 211 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 212 int **idx,*n,len,*idx_i; 213 int size,rank,Mbs,i,j,k,ierr,**rbuf,nrqs,msz,*outdat,*indat; 214 int *onodes1,*olengths1,tag1,tag2,*onodes2,*olengths2,flag,proc_id; 215 MPI_Comm comm; 216 MPI_Request *s_waits1,*s_waits2,r_req; 217 MPI_Status *s_status,r_status; 218 219 PetscFunctionBegin; 220 221 comm = C->comm; 222 size = c->size; 223 rank = c->rank; 224 Mbs = c->Mbs; 225 226 /* 1. Send is[] to all other processors */ 227 /*--------------------------------------*/ 228 /* This processor sends its is[] to all other processors in the format: 229 outdat[0] = is_max, no of is in this processor 230 outdat[1] = n[0], size of is[0] 231 ... 232 outdat[is_max] = n[is_max-1], size of is[is_max-1] 233 outdat[is_max + 1] = data(is[0]) 234 ... 235 outdat[is_max + i] = data(is[i]) 236 ... 237 */ 238 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 239 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 240 printf(" [%d] tags: %d, %d\n",rank,tag1,tag2); 241 242 len = (is_max+1)*sizeof(int*)+ (is_max)*sizeof(int); 243 ierr = PetscMalloc(len,&idx);CHKERRQ(ierr); 244 n = (int*)(idx + is_max); 245 246 /* Allocate Memory for outgoing messages */ 247 len = 1 + is_max; 248 for (i=0; i<is_max; i++) { 249 ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 250 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 251 len += n[i]; 252 } 253 ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr); 254 255 /* Form the outgoing messages */ 256 outdat[0] = is_max; 257 for (i=0; i<is_max; i++) { 258 outdat[i+1] = n[i]; 259 } 260 k = is_max + 1; 261 for (i=0; i<is_max; i++) { /* for is[i] */ 262 idx_i = idx[i]; 263 for (j=0; j<n[i]; j++){ 264 outdat[k] = *(idx_i); 265 /* if (!rank) printf(" outdat[%d] = %d\n",k,outdat[k] ); */ 266 k++; idx_i++; 267 } 268 /* printf(" [%d] n[%d]=%d, k: %d, \n",rank,i,n[i],k); */ 269 } 270 if (k != len) SETERRQ3(1,"[%d] Error on forming the outgoing messages: k %d != len %d",rank,k,len); 271 272 /* Now post the sends */ 273 ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 274 275 k = 0; 276 for (proc_id=0; proc_id<size; ++proc_id) { /* send outdat to processor [proc_id] */ 277 if (proc_id != rank){ 278 ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr); 279 printf(" [%d] send %d msg to [%d] \n",rank,len,proc_id); 280 k++; 281 } 282 } 283 284 /* 2. Do local work */ 285 /*------------------*/ 286 Mat A = c->A, B = c->B; 287 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 288 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 289 int row,mbs, *nidx,*nidx_i,col,isz,isz0,*ai,*aj,bs,*bi,*bj,*garray,rstart,l; 290 int a_start,a_end,b_start,b_end; 291 PetscBT table; 292 PetscBT table0; 293 294 mbs = a->mbs; 295 bs = a->bs; 296 ai = a->i; 297 aj = a->j; 298 bi = b->i; 299 bj = b->j; 300 garray = c->garray; 301 rstart = c->rstart; 302 303 ierr = PetscBTCreate(Mbs,table);CHKERRQ(ierr); 304 ierr = PetscMalloc(is_max*Mbs*sizeof(int),&nidx);CHKERRQ(ierr); 305 ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr); 306 307 for (i=0; i<is_max; i++) { /* for each is */ 308 isz = 0; 309 ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr); 310 idx_i = idx[i]; 311 nidx_i = nidx+i*Mbs; /* holds new is[i] array */ 312 313 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 314 for (j=0; j<n[i]; j++){ 315 col = idx_i[j]; 316 if (col >= Mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"[%d] index col %d >= Mbs %d",rank,col,Mbs); 317 if(!PetscBTLookupSet(table,col)) { nidx_i[isz++] = col;} 318 } 319 320 k = 0; 321 /* set table0 for lookup */ 322 ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr); 323 for (l=k; l<isz; l++) PetscBTSet(table0,nidx_i[l]); 324 325 isz0 = isz; /* length of nidx_i[] before updating */ 326 for (row=0; row<mbs; row++){ 327 a_start = ai[row]; a_end = ai[row+1]; 328 b_start = bi[row]; b_end = bi[row+1]; 329 if (PetscBTLookup(table0,row+rstart)){ /* row is on nidx_i - row search: collect all col in this row */ 330 /* printf(" [%d] is[%d] row %d is on nidx_i\n",rank,i,row+rstart); */ 331 for (l = a_start; l<a_end ; l++){ /* Amat */ 332 col = aj[l] + rstart; 333 if (!PetscBTLookupSet(table,col)) {nidx_i[isz++] = col;} 334 } 335 for (l = b_start; l<b_end ; l++){ /* Bmat */ 336 col = garray[bj[l]]; 337 if (!PetscBTLookupSet(table,col)) {nidx_i[isz++] = col;} 338 } 339 k++; 340 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 341 } else { /* row is not on nidx_i - col serach: add row onto nidx_i if there is a col in nidx_i */ 342 for (l = a_start; l<a_end ; l++){ /* Amat */ 343 col = aj[l] + rstart; 344 if (PetscBTLookup(table0,col)){ 345 if (!PetscBTLookupSet(table,row+rstart)) {nidx_i[isz++] = row+rstart;} 346 break; /* for l = start; l<end ; l++) */ 347 } 348 } 349 for (l = b_start; l<b_end ; l++){ /* Bmat */ 350 col = garray[bj[l]]; 351 if (PetscBTLookup(table0,col)){ 352 if (!PetscBTLookupSet(table,row+rstart)) {nidx_i[isz++] = row+rstart;} 353 break; /* for l = start; l<end ; l++) */ 354 } 355 } 356 } 357 } /* for (row=0; row<mbs; row++) */ 358 359 ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 360 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 361 n[i] = isz; 362 } /* /* for each is */ 363 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 364 ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 365 366 367 /* 3. Receive other's is[] and process. Then send back */ 368 /*----------------------------------------------------*/ 369 /* Send is done */ 370 nrqs = size-1; 371 ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 372 ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr); 373 ierr = PetscFree(outdat);CHKERRQ(ierr); 374 ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 375 k = 0; 376 do { 377 /* Receive messages */ 378 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status); 379 if (flag){ 380 ierr = MPI_Get_count(&r_status,MPI_INT,&len); 381 proc_id = r_status.MPI_SOURCE; 382 ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr); 383 ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req); 384 printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id); 385 386 /* Process messages -- not done yet */ 387 len = indat[0]; 388 ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr); 389 for (i=0; i<len; i++){outdat[i] = indat[i+1];} 390 391 /* Send messages back */ 392 printf(" [%d] send %d msg back to [%d] \n",rank,len,proc_id); 393 ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag2,comm,&s_waits2[k]);CHKERRQ(ierr); 394 395 k++; 396 ierr = PetscFree(outdat);CHKERRQ(ierr); 397 ierr = PetscFree(indat);CHKERRQ(ierr); 398 } 399 } while (k < nrqs); 400 401 /* 4. Receive work done on other processors, then process */ 402 /*--------------------------------------------------------*/ 403 ierr = MPI_Waitall(nrqs,s_waits2,s_status);CHKERRQ(ierr); 404 k = 0; 405 do { 406 /* Receive messages */ 407 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); 408 if (flag){ 409 ierr = MPI_Get_count(&r_status,MPI_INT,&len); 410 proc_id = r_status.MPI_SOURCE; 411 ierr = ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr); 412 ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req); 413 printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id); 414 415 /* Process messages -- not done yet */ 416 417 418 k++; 419 ierr = PetscFree(indat);CHKERRQ(ierr); 420 } 421 } while (k < nrqs); 422 423 /* 5. Create new is[] */ 424 /*--------------------*/ 425 for (i=0; i<is_max; i++) { 426 nidx_i = nidx+i*Mbs; 427 ierr = ISCreateGeneral(PETSC_COMM_SELF,n[i],nidx_i,is+i);CHKERRQ(ierr); 428 } 429 ierr = PetscFree(nidx);CHKERRQ(ierr); 430 431 #ifdef OLD 432 ierr = PetscFree(onodes2);CHKERRQ(ierr); 433 ierr = PetscFree(olengths2);CHKERRQ(ierr); 434 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 435 436 ierr = PetscFree(table);CHKERRQ(ierr); 437 ierr = PetscFree(s_status);CHKERRQ(ierr); 438 ierr = PetscFree(recv_status);CHKERRQ(ierr); 439 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 440 ierr = PetscFree(xdata);CHKERRQ(ierr); 441 ierr = PetscFree(isz1);CHKERRQ(ierr); 442 #endif /* OLD */ 443 ierr = PetscFree(idx);CHKERRQ(ierr); 444 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 445 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 446 ierr = PetscFree(s_status);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local" 452 /* 453 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatincreaseOverlap, to do 454 the work on the local processor. 455 456 Inputs: 457 C - MAT_MPISBAIJ; 458 imax - total no of index sets processed at a time; 459 table - an array of char - size = Mbs bits. 460 461 Output: 462 isz - array containing the count of the solution elements correspondign 463 to each index set; 464 data - pointer to the solutions 465 */ 466 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat C,int imax,PetscBT *table,int *isz,int **data) 467 { 468 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 469 Mat A = c->A,B = c->B; 470 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 471 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 472 int start,end,val,max,rstart,cstart,*ai,*aj; 473 int *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; 474 PetscBT table_i; 475 476 PetscFunctionBegin; 477 rstart = c->rstart; 478 cstart = c->cstart; 479 ai = a->i; 480 aj = a->j; 481 bi = b->i; 482 bj = b->j; 483 garray = c->garray; 484 485 486 for (i=0; i<imax; i++) { 487 data_i = data[i]; 488 table_i = table[i]; 489 isz_i = isz[i]; 490 for (j=0,max=isz[i]; j<max; j++) { 491 row = data_i[j] - rstart; 492 start = ai[row]; 493 end = ai[row+1]; 494 for (k=start; k<end; k++) { /* Amat */ 495 val = aj[k] + cstart; 496 if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;} 497 } 498 start = bi[row]; 499 end = bi[row+1]; 500 for (k=start; k<end; k++) { /* Bmat */ 501 val = garray[bj[k]]; 502 if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;} 503 } 504 } 505 isz[i] = isz_i; 506 } 507 PetscFunctionReturn(0); 508 } 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Receive" 511 /* 512 MatIncreaseOverlap_MPISBAIJ_Receive - Process the recieved messages, 513 and return the output 514 515 Input: 516 C - the matrix 517 nrqr - no of messages being processed. 518 rbuf - an array of pointers to the recieved requests 519 520 Output: 521 xdata - array of messages to be sent back 522 isz1 - size of each message 523 524 For better efficiency perhaps we should malloc seperately each xdata[i], 525 then if a remalloc is required we need only copy the data for that one row 526 rather then all previous rows as it is now where a single large chunck of 527 memory is used. 528 529 */ 530 static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat C,int nrqr,int **rbuf,int **xdata,int * isz1) 531 { 532 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 533 Mat A = c->A,B = c->B; 534 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 535 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 536 int rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; 537 int row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; 538 int val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr; 539 int *rbuf_i,kmax,rbuf_0,ierr; 540 PetscBT xtable; 541 542 PetscFunctionBegin; 543 rank = c->rank; 544 Mbs = c->Mbs; 545 rstart = c->rstart; 546 cstart = c->cstart; 547 ai = a->i; 548 aj = a->j; 549 bi = b->i; 550 bj = b->j; 551 garray = c->garray; 552 553 554 for (i=0,ct=0,total_sz=0; i<nrqr; ++i) { 555 rbuf_i = rbuf[i]; 556 rbuf_0 = rbuf_i[0]; 557 ct += rbuf_0; 558 for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; } 559 } 560 561 if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs; 562 else max1 = 1; 563 mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); 564 ierr = PetscMalloc(mem_estimate*sizeof(int),&xdata[0]);CHKERRQ(ierr); 565 ++no_malloc; 566 ierr = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr); 567 ierr = PetscMemzero(isz1,nrqr*sizeof(int));CHKERRQ(ierr); 568 569 ct3 = 0; 570 for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */ 571 rbuf_i = rbuf[i]; 572 rbuf_0 = rbuf_i[0]; 573 ct1 = 2*rbuf_0+1; 574 ct2 = ct1; 575 ct3 += ct1; 576 for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/ 577 ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr); 578 oct2 = ct2; 579 kmax = rbuf_i[2*j]; 580 for (k=0; k<kmax; k++,ct1++) { 581 row = rbuf_i[ct1]; 582 if (!PetscBTLookupSet(xtable,row)) { 583 if (!(ct3 < mem_estimate)) { 584 new_estimate = (int)(1.5*mem_estimate)+1; 585 ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr); 586 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr); 587 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 588 xdata[0] = tmp; 589 mem_estimate = new_estimate; ++no_malloc; 590 for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 591 } 592 xdata[i][ct2++] = row; 593 ct3++; 594 } 595 } 596 for (k=oct2,max2=ct2; k<max2; k++) { 597 row = xdata[i][k] - rstart; 598 start = ai[row]; 599 end = ai[row+1]; 600 for (l=start; l<end; l++) { 601 val = aj[l] + cstart; 602 if (!PetscBTLookupSet(xtable,val)) { 603 if (!(ct3 < mem_estimate)) { 604 new_estimate = (int)(1.5*mem_estimate)+1; 605 ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr); 606 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr); 607 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 608 xdata[0] = tmp; 609 mem_estimate = new_estimate; ++no_malloc; 610 for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 611 } 612 xdata[i][ct2++] = val; 613 ct3++; 614 } 615 } 616 start = bi[row]; 617 end = bi[row+1]; 618 for (l=start; l<end; l++) { 619 val = garray[bj[l]]; 620 if (!PetscBTLookupSet(xtable,val)) { 621 if (!(ct3 < mem_estimate)) { 622 new_estimate = (int)(1.5*mem_estimate)+1; 623 ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr); 624 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr); 625 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 626 xdata[0] = tmp; 627 mem_estimate = new_estimate; ++no_malloc; 628 for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 629 } 630 xdata[i][ct2++] = val; 631 ct3++; 632 } 633 } 634 } 635 /* Update the header*/ 636 xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/ 637 xdata[i][2*j-1] = rbuf_i[2*j-1]; 638 } 639 xdata[i][0] = rbuf_0; 640 xdata[i+1] = xdata[i] + ct2; 641 isz1[i] = ct2; /* size of each message */ 642 } 643 ierr = PetscBTDestroy(xtable);CHKERRQ(ierr); 644 PetscLogInfo(0,"MatIncreaseOverlap_MPISBAIJ:[%d] Allocated %d bytes, required %d, no of mallocs = %d\n",rank,mem_estimate,ct3,no_malloc); 645 PetscFunctionReturn(0); 646 } 647 648 649