1 /*$Id: sbaijov.c,v 1.65 2001/08/06 21:15:42 bsmith Exp $*/ 2 3 /* 4 Routines to compute overlapping regions of a parallel MPI matrix. 5 Used for finding submatrices that were shared across processors. 6 */ 7 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 8 #include "petscbt.h" 9 10 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat,int,IS *); 11 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat,int *,int **,PetscBT*); 12 static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat,int,int **,int**,int*); 13 14 /* this function is sasme as MatCompressIndicesGeneral_MPIBAIJ -- should be removed! */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatCompressIndicesGeneral_MPISBAIJ" 17 static int MatCompressIndicesGeneral_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[]) 18 { 19 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)C->data; 20 int ierr,isz,bs = baij->bs,n,i,j,*idx,ival; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 int tt, gid1, *nidx; 24 PetscTablePosition tpos; 25 #else 26 int Nbs,*nidx; 27 PetscBT table; 28 #endif 29 30 PetscFunctionBegin; 31 /* printf(" ...MatCompressIndicesGeneral_MPISBAIJ is called ...\n"); */ 32 #if defined (PETSC_USE_CTABLE) 33 ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr); 34 #else 35 Nbs = baij->Nbs; 36 ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 37 ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr); 38 #endif 39 for (i=0; i<imax; i++) { 40 isz = 0; 41 #if defined (PETSC_USE_CTABLE) 42 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 43 #else 44 ierr = PetscBTMemzero(Nbs,table);CHKERRQ(ierr); 45 #endif 46 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 47 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 48 for (j=0; j<n ; j++) { 49 ival = idx[j]/bs; /* convert the indices into block indices */ 50 #if defined (PETSC_USE_CTABLE) 51 ierr = PetscTableFind(gid1_lid1,ival+1,&tt);CHKERRQ(ierr); 52 if (!tt) { 53 ierr = PetscTableAdd(gid1_lid1,ival+1,isz+1);CHKERRQ(ierr); 54 isz++; 55 } 56 #else 57 if (ival>Nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 58 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 59 #endif 60 } 61 ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); 62 #if defined (PETSC_USE_CTABLE) 63 ierr = PetscMalloc((isz+1)*sizeof(int),&nidx);CHKERRQ(ierr); 64 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 65 j = 0; 66 while (tpos) { 67 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr); 68 if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); } 69 nidx[tt] = gid1 - 1; 70 j++; 71 } 72 if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); } 73 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); 74 ierr = PetscFree(nidx);CHKERRQ(ierr); 75 #else 76 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); 77 #endif 78 } 79 #if defined (PETSC_USE_CTABLE) 80 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 81 #else 82 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 83 ierr = PetscFree(nidx);CHKERRQ(ierr); 84 #endif 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatExpandIndices_MPISBAIJ" 90 static int MatExpandIndices_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[]) 91 { 92 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)C->data; 93 int ierr,bs = baij->bs,n,i,j,k,*idx,*nidx; 94 #if defined (PETSC_USE_CTABLE) 95 int maxsz; 96 #else 97 int Nbs = baij->Nbs; 98 #endif 99 100 PetscFunctionBegin; 101 /* printf(" ... MatExpandIndices_MPISBAIJ is called ...\n"); */ 102 #if defined (PETSC_USE_CTABLE) 103 /* Now check max size */ 104 for (i=0,maxsz=0; i<imax; i++) { 105 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 106 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 107 if (n*bs > maxsz) maxsz = n*bs; 108 } 109 ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr); 110 #else 111 ierr = PetscMalloc((Nbs*bs+1)*sizeof(int),&nidx);CHKERRQ(ierr); 112 #endif 113 114 for (i=0; i<imax; i++) { 115 ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr); 116 ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr); 117 for (j=0; j<n ; ++j){ 118 for (k=0; k<bs; k++) 119 nidx[j*bs+k] = idx[j]*bs+k; 120 } 121 ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); 122 ierr = ISCreateGeneral(PETSC_COMM_SELF,n*bs,nidx,is_out+i);CHKERRQ(ierr); 123 } 124 ierr = PetscFree(nidx);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 129 #undef __FUNCT__ 130 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ" 131 int MatIncreaseOverlap_MPISBAIJ(Mat C,int is_max,IS is[],int ov) 132 { 133 int i,ierr; 134 IS *is_new; 135 136 PetscFunctionBegin; 137 ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); 138 /* Convert the indices into block format */ 139 ierr = MatCompressIndicesGeneral_MPISBAIJ(C,is_max,is,is_new);CHKERRQ(ierr); 140 if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} 141 for (i=0; i<ov; ++i) { 142 ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); 143 } 144 for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);} 145 ierr = MatExpandIndices_MPISBAIJ(C,is_max,is_new,is);CHKERRQ(ierr); 146 for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);} 147 ierr = PetscFree(is_new);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once" 153 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat C,int is_max,IS is[]) 154 { 155 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 156 int **idx,*n,len,*idx_i,*nidx,*nidx_i,isz,col; 157 int size,rank,Mbs,i,j,k,ierr,nrqs,*outdat,*indat; 158 int tag1,tag2,flag,proc_id; 159 MPI_Comm comm; 160 MPI_Request *s_waits1,*s_waits2,r_req; 161 MPI_Status *s_status,r_status; 162 PetscBT *table; 163 PetscBT table_i; 164 PetscBT *table_tmp; 165 166 PetscFunctionBegin; 167 168 comm = C->comm; 169 size = c->size; 170 rank = c->rank; 171 Mbs = c->Mbs; 172 173 /* int prid=0; */ 174 175 len = is_max*sizeof(PetscBT) + (Mbs/PETSC_BITS_PER_BYTE+1)*is_max*sizeof(char) + 1; 176 ierr = PetscMalloc(len,&table);CHKERRQ(ierr); 177 char *t_p; 178 t_p = (char *)(table + is_max); 179 for (i=0; i<is_max; i++) { 180 table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 181 } 182 183 ierr = PetscMalloc(len,&table_tmp);CHKERRQ(ierr); 184 t_p = (char *)(table_tmp + is_max); 185 for (i=0; i<is_max; i++) { 186 table_tmp[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 187 } 188 189 /* 1. Send is[] to all other processors */ 190 /*--------------------------------------*/ 191 /* This processor sends its is[] to all other processors in the format: 192 outdat[0] = is_max, no of is in this processor 193 outdat[1] = n[0], size of is[0] 194 ... 195 outdat[is_max] = n[is_max-1], size of is[is_max-1] 196 outdat[is_max + 1] = data(is[0]) 197 ... 198 outdat[is_max+1+sum(n[k]), k=0,...,i-1] = data(is[i]) 199 ... 200 */ 201 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 202 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 203 204 len = (is_max+1)*sizeof(int*)+ (is_max)*sizeof(int); 205 ierr = PetscMalloc(len,&idx);CHKERRQ(ierr); 206 n = (int*)(idx + is_max); 207 208 /* Allocate Memory for outgoing messages */ 209 len = 1 + is_max; 210 for (i=0; i<is_max; i++) { 211 ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 212 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 213 len += n[i]; 214 } 215 ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr); 216 217 /* Form the outgoing messages */ 218 outdat[0] = is_max; 219 for (i=0; i<is_max; i++) { 220 outdat[i+1] = n[i]; 221 } 222 k = is_max + 1; 223 for (i=0; i<is_max; i++) { /* for is[i] */ 224 idx_i = idx[i]; 225 for (j=0; j<n[i]; j++){ 226 outdat[k] = *(idx_i); 227 k++; idx_i++; 228 } 229 } 230 if (k != len) SETERRQ2(1,"Error on forming the outgoing messages: k %d != len %d",k,len); 231 232 /* Now post the sends */ 233 ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 234 235 k = 0; 236 for (proc_id=0; proc_id<size; ++proc_id) { /* send outdat to processor [proc_id] */ 237 if (proc_id != rank){ 238 ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr); 239 /* printf(" [%d] send %d msg to [%d] \n",rank,len,proc_id); */ 240 k++; 241 } 242 } 243 244 /* 2. Do local work on this processor's is[] */ 245 /*-------------------------------------------*/ 246 ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,outdat,&nidx,table);CHKERRQ(ierr); 247 248 for (i=0; i<is_max; i++){ 249 ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 250 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 251 } 252 253 /* 3. Receive other's is[] and process. Then send back */ 254 /*----------------------------------------------------*/ 255 /* Send is done */ 256 nrqs = size-1; 257 ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 258 ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr); 259 260 /* save n[i] */ 261 for (i=0; i<is_max; i++){ 262 n[i] = outdat[1+i]; 263 } 264 ierr = PetscFree(outdat);CHKERRQ(ierr); 265 ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 266 int **outdat_ptr; 267 ierr = PetscMalloc(size*sizeof(int**),&outdat_ptr); 268 k = 0; 269 do { 270 /* Receive messages */ 271 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status); 272 if (flag){ 273 ierr = MPI_Get_count(&r_status,MPI_INT,&len); 274 proc_id = r_status.MPI_SOURCE; 275 ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr); 276 ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req); 277 /* printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id); */ 278 279 /* Process messages */ 280 ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,indat,&outdat_ptr[k],table_tmp);CHKERRQ(ierr); 281 outdat = outdat_ptr[k]; 282 len = 1 + outdat[0]; 283 for (i=0; i<outdat[0]; i++){ 284 len += outdat[1 + i]; 285 } 286 287 /* Send messages back */ 288 /* printf(" [%d] send %d msg back to [%d] \n",rank,len,proc_id); */ 289 ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag2,comm,&s_waits2[k]);CHKERRQ(ierr); 290 291 ierr = PetscFree(indat);CHKERRQ(ierr); 292 k++; 293 } 294 } while (k < nrqs); 295 296 /* 4. Receive work done on other processors, then merge */ 297 /*--------------------------------------------------------*/ 298 ierr = MPI_Waitall(nrqs,s_waits2,s_status);CHKERRQ(ierr); 299 for (k=0; k<nrqs; k++){ 300 ierr = PetscFree(outdat_ptr[k]);CHKERRQ(ierr); 301 } 302 ierr = PetscFree(outdat_ptr);CHKERRQ(ierr); 303 304 /* allocate memory for merged data */ 305 int *mydata,*mydata_i; 306 ierr = PetscMalloc((1+is_max*(Mbs+1))*sizeof(int),&mydata);CHKERRQ(ierr); 307 308 /* copy nidx into mydata */ 309 k = is_max + 1; 310 for (i=0; i<is_max; i++){ 311 mydata[1+i] = nidx[1+i]; /* size of is[i] before merge */ 312 mydata_i = mydata + 1 + is_max + Mbs*i; 313 for (j=0; j<nidx[1+i]; j++){ 314 mydata_i[j] = nidx[k++]; 315 } 316 } 317 318 k = 0; 319 do { 320 /* Receive messages */ 321 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); 322 if (flag){ 323 ierr = MPI_Get_count(&r_status,MPI_INT,&len); 324 proc_id = r_status.MPI_SOURCE; 325 ierr = ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr); 326 ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req); 327 /* printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id); */ 328 329 /* merge indat into mydata */ 330 nidx_i = indat + 1 + is_max; 331 for (i=0; i<is_max; i++){ 332 table_i = table[i]; 333 mydata_i = mydata + 1 + is_max + Mbs*i; 334 isz = mydata[1+i]; /* size of is[i] from nidx */ 335 336 for (j=0; j<indat[1+i]; j++){ 337 col = nidx_i[j]; 338 if (!PetscBTLookupSet(table_i,col)) {mydata_i[isz++] = col;} 339 } 340 mydata[1+i] = isz; 341 if (i < is_max - 1){ 342 nidx_i += indat[1+i]; /* ptr to is[i+1] array from indat */ 343 } 344 } /* for (i=0; i<is_max; i++) */ 345 346 k++; 347 ierr = PetscFree(indat);CHKERRQ(ierr); 348 } 349 } while (k < nrqs); 350 351 /* 5. Create new is[] */ 352 /*--------------------*/ 353 for (i=0; i<is_max; i++) { 354 mydata_i = mydata + 1 + is_max + Mbs*i; 355 ierr = ISCreateGeneral(PETSC_COMM_SELF,mydata[1+i],mydata_i,is+i);CHKERRQ(ierr); 356 } 357 358 ierr = PetscFree(mydata);CHKERRQ(ierr); 359 ierr = PetscFree(nidx);CHKERRQ(ierr); 360 ierr = PetscFree(idx);CHKERRQ(ierr); 361 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 362 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 363 ierr = PetscFree(s_status);CHKERRQ(ierr); 364 ierr = PetscFree(table);CHKERRQ(ierr); 365 ierr = PetscFree(table_tmp);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local" 371 /* 372 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do 373 the work on the local processor. 374 375 Inputs: 376 C - MAT_MPISBAIJ; 377 data - holds is[] in the format: 378 data[0] = is_max, no of is 379 data[1] = size of is[0] 380 ... 381 data[is_max] = size of is[is_max-1] 382 data[is_max + 1] = is[0] array 383 ... 384 data[is_max+1+sum(n[k]), k=0,...,i-1] = is[i] array 385 ... 386 387 Output: 388 data_new - holds new is[] in the same format as data 389 table - table[i]: mark the indices of is[i], i=0,...,is_max. 390 */ 391 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat C,int *data,int **data_new,PetscBT *table) 392 { 393 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 394 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; 395 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; 396 int ierr,row,mbs,Mbs,*nidx,*nidx_i,col,isz,isz0,*ai,*aj,bs,*bi,*bj,*garray,rstart,l; 397 int a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; 398 PetscBT table0; 399 PetscBT table_i; /* poits to i-th table */ 400 401 PetscFunctionBegin; 402 Mbs = c->Mbs; mbs = a->mbs; bs = a->bs; 403 ai = a->i; aj = a->j; 404 bi = b->i; bj = b->j; 405 garray = c->garray; 406 rstart = c->rstart; 407 is_max = data[0]; 408 409 /* int rank=c->rank,prid=0; */ /* for debugging */ 410 411 ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr); 412 413 ierr = PetscMalloc((1+is_max*(Mbs+1))*sizeof(int),&nidx);CHKERRQ(ierr); 414 nidx[0] = is_max; 415 416 idx_i = data + is_max + 1; /* ptr to input is[0] array */ 417 nidx_i = nidx + is_max + 1; /* ptr to active is[0] array */ 418 for (i=0; i<is_max; i++) { /* for each is */ 419 isz = 0; 420 table_i = table[i]; 421 ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr); 422 n = data[1+i]; /* size of input is[i] */ 423 424 if (n > 0) { 425 426 /* Enter input is[i] into active is[i] */ 427 for (j=0; j<n; j++){ 428 col = idx_i[j]; 429 if (col >= Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"index col %d >= Mbs %d",col,Mbs); 430 if(!PetscBTLookupSet(table_i,col)) { nidx_i[isz++] = col;} 431 } 432 433 /* set table0 for lookup */ 434 ierr = PetscBTMemzero(Mbs,table0);CHKERRQ(ierr); 435 for (l=0; l<isz; l++) PetscBTSet(table0,nidx_i[l]); 436 437 isz0 = isz; /* size of input is[i] after removing repeated indices */ 438 k = 0; /* no. of indices from input is[i] that have been examined */ 439 for (row=0; row<mbs; row++){ 440 a_start = ai[row]; a_end = ai[row+1]; 441 b_start = bi[row]; b_end = bi[row+1]; 442 if (PetscBTLookup(table0,row+rstart)){ /* row is on nidx_i - row search: collect all col in this row */ 443 for (l = a_start; l<a_end ; l++){ /* Amat */ 444 col = aj[l] + rstart; 445 if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;} 446 } 447 for (l = b_start; l<b_end ; l++){ /* Bmat */ 448 col = garray[bj[l]]; 449 if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;} 450 } 451 k++; 452 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 453 } else { /* row is not on nidx_i - col serach: add row onto nidx_i if there is a col in nidx_i */ 454 for (l = a_start; l<a_end ; l++){ /* Amat */ 455 col = aj[l] + rstart; 456 if (PetscBTLookup(table0,col)){ 457 if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} 458 break; /* for l = start; l<end ; l++) */ 459 } 460 } 461 for (l = b_start; l<b_end ; l++){ /* Bmat */ 462 col = garray[bj[l]]; 463 if (PetscBTLookup(table0,col)){ 464 if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} 465 break; /* for l = start; l<end ; l++) */ 466 } 467 } 468 } 469 } /* for (row=0; row<mbs; row++) */ 470 } /* if (n > 0) */ 471 472 if (i < is_max - 1){ 473 idx_i += n; /* ptr to input is[i+1] array */ 474 nidx_i += isz; /* ptr to active is[i+1] array */ 475 } 476 nidx[1+i] = isz; /* size of new is[i] */ 477 } /* /* for each is */ 478 *data_new = nidx; 479 ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 480 481 PetscFunctionReturn(0); 482 } 483 484 485