1 2 /* 3 Routines to compute overlapping regions of a parallel MPI matrix 4 and to find submatrices that were shared across processors. 5 */ 6 #include <../src/mat/impls/baij/mpi/mpibaij.h> 7 #include <petscbt.h> 8 9 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**); 10 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); 11 extern PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 12 extern PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ" 16 PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) 17 { 18 PetscErrorCode ierr; 19 PetscInt i,N=C->cmap->N, bs=C->rmap->bs; 20 IS *is_new; 21 22 PetscFunctionBegin; 23 ierr = PetscMalloc(imax*sizeof(IS),&is_new);CHKERRQ(ierr); 24 /* Convert the indices into block format */ 25 ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,imax,is,is_new);CHKERRQ(ierr); 26 if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); 27 for (i=0; i<ov; ++i) { 28 ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,imax,is_new);CHKERRQ(ierr); 29 } 30 for (i=0; i<imax; i++) {ierr = ISDestroy(&is[i]);CHKERRQ(ierr);} 31 ierr = ISExpandIndicesGeneral(N,N,bs,imax,is_new,is);CHKERRQ(ierr); 32 for (i=0; i<imax; i++) {ierr = ISDestroy(&is_new[i]);CHKERRQ(ierr);} 33 ierr = PetscFree(is_new);CHKERRQ(ierr); 34 PetscFunctionReturn(0); 35 } 36 37 /* 38 Sample message format: 39 If a processor A wants processor B to process some elements corresponding 40 to index sets is[1], is[5] 41 mesg [0] = 2 (no of index sets in the mesg) 42 ----------- 43 mesg [1] = 1 => is[1] 44 mesg [2] = sizeof(is[1]); 45 ----------- 46 mesg [5] = 5 => is[5] 47 mesg [6] = sizeof(is[5]); 48 ----------- 49 mesg [7] 50 mesg [n] data(is[1]) 51 ----------- 52 mesg[n+1] 53 mesg[m] data(is[5]) 54 ----------- 55 56 Notes: 57 nrqs - no of requests sent (or to be sent out) 58 nrqr - no of requests recieved (which have to be or which have been processed 59 */ 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Once" 62 PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[]) 63 { 64 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 65 const PetscInt **idx,*idx_i; 66 PetscInt *n,*w3,*w4,**data,len; 67 PetscErrorCode ierr; 68 PetscMPIInt size,rank,tag1,tag2,*w2,*w1,nrqr; 69 PetscInt Mbs,i,j,k,**rbuf,row,proc=-1,nrqs,msz,**outdat,**ptr; 70 PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p; 71 PetscMPIInt *onodes1,*olengths1,*onodes2,*olengths2; 72 PetscBT *table; 73 MPI_Comm comm; 74 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; 75 MPI_Status *s_status,*recv_status; 76 char *t_p; 77 78 PetscFunctionBegin; 79 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 80 size = c->size; 81 rank = c->rank; 82 Mbs = c->Mbs; 83 84 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 85 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 86 87 ierr = PetscMalloc2(imax+1,&idx,imax,&n);CHKERRQ(ierr); 88 89 for (i=0; i<imax; i++) { 90 ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 91 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 92 } 93 94 /* evaluate communication - mesg to who,length of mesg, and buffer space 95 required. Based on this, buffers are allocated, and data copied into them*/ 96 ierr = PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); 97 ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 98 ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 99 ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); 100 for (i=0; i<imax; i++) { 101 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 102 idx_i = idx[i]; 103 len = n[i]; 104 for (j=0; j<len; j++) { 105 row = idx_i[j]; 106 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries"); 107 ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 108 w4[proc]++; 109 } 110 for (j=0; j<size; j++) { 111 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 112 } 113 } 114 115 nrqs = 0; /* no of outgoing messages */ 116 msz = 0; /* total mesg length (for all proc */ 117 w1[rank] = 0; /* no mesg sent to itself */ 118 w3[rank] = 0; 119 for (i=0; i<size; i++) { 120 if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 121 } 122 /* pa - is list of processors to communicate with */ 123 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); 124 for (i=0,j=0; i<size; i++) { 125 if (w1[i]) {pa[j] = i; j++;} 126 } 127 128 /* Each message would have a header = 1 + 2*(no of IS) + data */ 129 for (i=0; i<nrqs; i++) { 130 j = pa[i]; 131 w1[j] += w2[j] + 2*w3[j]; 132 msz += w1[j]; 133 } 134 135 /* Determine the number of messages to expect, their lengths, from from-ids */ 136 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 137 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 138 139 /* Now post the Irecvs corresponding to these messages */ 140 ierr = PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);CHKERRQ(ierr); 141 142 /* Allocate Memory for outgoing messages */ 143 ierr = PetscMalloc4(size,&outdat,size,&ptr,msz,&tmp,size,&ctr);CHKERRQ(ierr); 144 ierr = PetscMemzero(outdat,size*sizeof(PetscInt*));CHKERRQ(ierr); 145 ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 146 { 147 PetscInt *iptr = tmp,ict = 0; 148 for (i=0; i<nrqs; i++) { 149 j = pa[i]; 150 iptr += ict; 151 outdat[j] = iptr; 152 ict = w1[j]; 153 } 154 } 155 156 /* Form the outgoing messages */ 157 /*plug in the headers*/ 158 for (i=0; i<nrqs; i++) { 159 j = pa[i]; 160 outdat[j][0] = 0; 161 ierr = PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 162 ptr[j] = outdat[j] + 2*w3[j] + 1; 163 } 164 165 /* Memory for doing local proc's work*/ 166 { 167 ierr = PetscMalloc5(imax,&table, imax,&data, imax,&isz, Mbs*imax,&d_p, (Mbs/PETSC_BITS_PER_BYTE+1)*imax,&t_p);CHKERRQ(ierr); 168 ierr = PetscMemzero(table,imax*sizeof(PetscBT));CHKERRQ(ierr); 169 ierr = PetscMemzero(data,imax*sizeof(PetscInt*));CHKERRQ(ierr); 170 ierr = PetscMemzero(isz,imax*sizeof(PetscInt));CHKERRQ(ierr); 171 ierr = PetscMemzero(d_p,Mbs*imax*sizeof(PetscInt));CHKERRQ(ierr); 172 ierr = PetscMemzero(t_p,(Mbs/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));CHKERRQ(ierr); 173 174 for (i=0; i<imax; i++) { 175 table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 176 data[i] = d_p + (Mbs)*i; 177 } 178 } 179 180 /* Parse the IS and update local tables and the outgoing buf with the data*/ 181 { 182 PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j; 183 PetscBT table_i; 184 185 for (i=0; i<imax; i++) { 186 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 187 n_i = n[i]; 188 table_i = table[i]; 189 idx_i = idx[i]; 190 data_i = data[i]; 191 isz_i = isz[i]; 192 for (j=0; j<n_i; j++) { /* parse the indices of each IS */ 193 row = idx_i[j]; 194 ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 195 if (proc != rank) { /* copy to the outgoing buffer */ 196 ctr[proc]++; 197 *ptr[proc] = row; 198 ptr[proc]++; 199 } else { /* Update the local table */ 200 if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 201 } 202 } 203 /* Update the headers for the current IS */ 204 for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */ 205 if ((ctr_j = ctr[j])) { 206 outdat_j = outdat[j]; 207 k = ++outdat_j[0]; 208 outdat_j[2*k] = ctr_j; 209 outdat_j[2*k-1] = i; 210 } 211 } 212 isz[i] = isz_i; 213 } 214 } 215 216 /* Now post the sends */ 217 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 218 for (i=0; i<nrqs; ++i) { 219 j = pa[i]; 220 ierr = MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);CHKERRQ(ierr); 221 } 222 223 /* No longer need the original indices*/ 224 for (i=0; i<imax; ++i) { 225 ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 226 } 227 ierr = PetscFree2(idx,n);CHKERRQ(ierr); 228 229 for (i=0; i<imax; ++i) { 230 ierr = ISDestroy(&is[i]);CHKERRQ(ierr); 231 } 232 233 /* Do Local work*/ 234 ierr = MatIncreaseOverlap_MPIBAIJ_Local(C,imax,table,isz,data);CHKERRQ(ierr); 235 236 /* Receive messages*/ 237 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);CHKERRQ(ierr); 238 if (nrqr) {ierr = MPI_Waitall(nrqr,r_waits1,recv_status);CHKERRQ(ierr);} 239 240 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 241 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} 242 243 /* Phase 1 sends are complete - deallocate buffers */ 244 ierr = PetscFree4(outdat,ptr,tmp,ctr);CHKERRQ(ierr); 245 ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 246 247 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);CHKERRQ(ierr); 248 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);CHKERRQ(ierr); 249 ierr = MatIncreaseOverlap_MPIBAIJ_Receive(C,nrqr,rbuf,xdata,isz1);CHKERRQ(ierr); 250 ierr = PetscFree(rbuf[0]);CHKERRQ(ierr); 251 ierr = PetscFree(rbuf);CHKERRQ(ierr); 252 253 /* Send the data back*/ 254 /* Do a global reduction to know the buffer space req for incoming messages*/ 255 { 256 PetscMPIInt *rw1; 257 258 ierr = PetscMalloc(size*sizeof(PetscInt),&rw1);CHKERRQ(ierr); 259 ierr = PetscMemzero(rw1,size*sizeof(PetscInt));CHKERRQ(ierr); 260 261 for (i=0; i<nrqr; ++i) { 262 proc = recv_status[i].MPI_SOURCE; 263 if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch"); 264 rw1[proc] = isz1[i]; 265 } 266 267 ierr = PetscFree(onodes1);CHKERRQ(ierr); 268 ierr = PetscFree(olengths1);CHKERRQ(ierr); 269 270 /* Determine the number of messages to expect, their lengths, from from-ids */ 271 ierr = PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);CHKERRQ(ierr); 272 ierr = PetscFree(rw1);CHKERRQ(ierr); 273 } 274 /* Now post the Irecvs corresponding to these messages */ 275 ierr = PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);CHKERRQ(ierr); 276 277 /* Now post the sends */ 278 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 279 for (i=0; i<nrqr; ++i) { 280 j = recv_status[i].MPI_SOURCE; 281 ierr = MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);CHKERRQ(ierr); 282 } 283 284 /* receive work done on other processors*/ 285 { 286 PetscMPIInt idex; 287 PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax; 288 PetscBT table_i; 289 MPI_Status *status2; 290 291 ierr = PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);CHKERRQ(ierr); 292 for (i=0; i<nrqs; ++i) { 293 ierr = MPI_Waitany(nrqs,r_waits2,&idex,status2+i);CHKERRQ(ierr); 294 /* Process the message*/ 295 rbuf2_i = rbuf2[idex]; 296 ct1 = 2*rbuf2_i[0]+1; 297 jmax = rbuf2[idex][0]; 298 for (j=1; j<=jmax; j++) { 299 max = rbuf2_i[2*j]; 300 is_no = rbuf2_i[2*j-1]; 301 isz_i = isz[is_no]; 302 data_i = data[is_no]; 303 table_i = table[is_no]; 304 for (k=0; k<max; k++,ct1++) { 305 row = rbuf2_i[ct1]; 306 if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 307 } 308 isz[is_no] = isz_i; 309 } 310 } 311 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,status2);CHKERRQ(ierr);} 312 ierr = PetscFree(status2);CHKERRQ(ierr); 313 } 314 315 for (i=0; i<imax; ++i) { 316 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 317 } 318 319 320 ierr = PetscFree(onodes2);CHKERRQ(ierr); 321 ierr = PetscFree(olengths2);CHKERRQ(ierr); 322 323 ierr = PetscFree(pa);CHKERRQ(ierr); 324 ierr = PetscFree(rbuf2[0]);CHKERRQ(ierr); 325 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 326 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 327 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 328 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 329 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 330 ierr = PetscFree5(table,data,isz,d_p,t_p);CHKERRQ(ierr); 331 ierr = PetscFree(s_status);CHKERRQ(ierr); 332 ierr = PetscFree(recv_status);CHKERRQ(ierr); 333 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 334 ierr = PetscFree(xdata);CHKERRQ(ierr); 335 ierr = PetscFree(isz1);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Local" 341 /* 342 MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do 343 the work on the local processor. 344 345 Inputs: 346 C - MAT_MPIBAIJ; 347 imax - total no of index sets processed at a time; 348 table - an array of char - size = Mbs bits. 349 350 Output: 351 isz - array containing the count of the solution elements corresponding 352 to each index set; 353 data - pointer to the solutions 354 */ 355 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data) 356 { 357 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 358 Mat A = c->A,B = c->B; 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 360 PetscInt start,end,val,max,rstart,cstart,*ai,*aj; 361 PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; 362 PetscBT table_i; 363 364 PetscFunctionBegin; 365 rstart = c->rstartbs; 366 cstart = c->cstartbs; 367 ai = a->i; 368 aj = a->j; 369 bi = b->i; 370 bj = b->j; 371 garray = c->garray; 372 373 374 for (i=0; i<imax; i++) { 375 data_i = data[i]; 376 table_i = table[i]; 377 isz_i = isz[i]; 378 for (j=0,max=isz[i]; j<max; j++) { 379 row = data_i[j] - rstart; 380 start = ai[row]; 381 end = ai[row+1]; 382 for (k=start; k<end; k++) { /* Amat */ 383 val = aj[k] + cstart; 384 if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 385 } 386 start = bi[row]; 387 end = bi[row+1]; 388 for (k=start; k<end; k++) { /* Bmat */ 389 val = garray[bj[k]]; 390 if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 391 } 392 } 393 isz[i] = isz_i; 394 } 395 PetscFunctionReturn(0); 396 } 397 #undef __FUNCT__ 398 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Receive" 399 /* 400 MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages, 401 and return the output 402 403 Input: 404 C - the matrix 405 nrqr - no of messages being processed. 406 rbuf - an array of pointers to the recieved requests 407 408 Output: 409 xdata - array of messages to be sent back 410 isz1 - size of each message 411 412 For better efficiency perhaps we should malloc separately each xdata[i], 413 then if a remalloc is required we need only copy the data for that one row 414 rather than all previous rows as it is now where a single large chunck of 415 memory is used. 416 417 */ 418 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1) 419 { 420 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 421 Mat A = c->A,B = c->B; 422 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 423 PetscErrorCode ierr; 424 PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; 425 PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; 426 PetscInt val,max1,max2,Mbs,no_malloc =0,*tmp,new_estimate,ctr; 427 PetscInt *rbuf_i,kmax,rbuf_0; 428 PetscBT xtable; 429 430 PetscFunctionBegin; 431 Mbs = c->Mbs; 432 rstart = c->rstartbs; 433 cstart = c->cstartbs; 434 ai = a->i; 435 aj = a->j; 436 bi = b->i; 437 bj = b->j; 438 garray = c->garray; 439 440 441 for (i=0,ct=0,total_sz=0; i<nrqr; ++i) { 442 rbuf_i = rbuf[i]; 443 rbuf_0 = rbuf_i[0]; 444 ct += rbuf_0; 445 for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j]; 446 } 447 448 if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs; 449 else max1 = 1; 450 mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); 451 ierr = PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);CHKERRQ(ierr); 452 ++no_malloc; 453 ierr = PetscBTCreate(Mbs,&xtable);CHKERRQ(ierr); 454 ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); 455 456 ct3 = 0; 457 for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */ 458 rbuf_i = rbuf[i]; 459 rbuf_0 = rbuf_i[0]; 460 ct1 = 2*rbuf_0+1; 461 ct2 = ct1; 462 ct3 += ct1; 463 for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/ 464 ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr); 465 oct2 = ct2; 466 kmax = rbuf_i[2*j]; 467 for (k=0; k<kmax; k++,ct1++) { 468 row = rbuf_i[ct1]; 469 if (!PetscBTLookupSet(xtable,row)) { 470 if (!(ct3 < mem_estimate)) { 471 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 472 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 473 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 474 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 475 xdata[0] = tmp; 476 mem_estimate = new_estimate; ++no_malloc; 477 for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 478 } 479 xdata[i][ct2++] = row; 480 ct3++; 481 } 482 } 483 for (k=oct2,max2=ct2; k<max2; k++) { 484 row = xdata[i][k] - rstart; 485 start = ai[row]; 486 end = ai[row+1]; 487 for (l=start; l<end; l++) { 488 val = aj[l] + cstart; 489 if (!PetscBTLookupSet(xtable,val)) { 490 if (!(ct3 < mem_estimate)) { 491 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 492 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 493 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 494 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 495 xdata[0] = tmp; 496 mem_estimate = new_estimate; ++no_malloc; 497 for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 498 } 499 xdata[i][ct2++] = val; 500 ct3++; 501 } 502 } 503 start = bi[row]; 504 end = bi[row+1]; 505 for (l=start; l<end; l++) { 506 val = garray[bj[l]]; 507 if (!PetscBTLookupSet(xtable,val)) { 508 if (!(ct3 < mem_estimate)) { 509 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 510 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 511 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 512 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 513 xdata[0] = tmp; 514 mem_estimate = new_estimate; ++no_malloc; 515 for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 516 } 517 xdata[i][ct2++] = val; 518 ct3++; 519 } 520 } 521 } 522 /* Update the header*/ 523 xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/ 524 xdata[i][2*j-1] = rbuf_i[2*j-1]; 525 } 526 xdata[i][0] = rbuf_0; 527 xdata[i+1] = xdata[i] + ct2; 528 isz1[i] = ct2; /* size of each message */ 529 } 530 ierr = PetscBTDestroy(&xtable);CHKERRQ(ierr); 531 ierr = PetscInfo3(C,"Allocated %D bytes, required %D, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);CHKERRQ(ierr); 532 PetscFunctionReturn(0); 533 } 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ" 537 PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) 538 { 539 IS *isrow_new,*iscol_new; 540 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 541 PetscErrorCode ierr; 542 PetscInt nmax,nstages_local,nstages,i,pos,max_no,ncol,nrow,N=C->cmap->N,bs=C->rmap->bs; 543 PetscBool colflag,*allcolumns,*allrows; 544 545 PetscFunctionBegin; 546 /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */ 547 for (i = 0; i < ismax; ++i) { 548 PetscBool sorted; 549 ierr = ISSorted(iscol[i], &sorted);CHKERRQ(ierr); 550 if (!sorted) SETERRQ1(((PetscObject)iscol[i])->comm, PETSC_ERR_SUP, "Column index set %D not sorted", i); 551 } 552 /* The compression and expansion should be avoided. Doesn't point 553 out errors, might change the indices, hence buggey */ 554 ierr = PetscMalloc2(ismax+1,&isrow_new,ismax+1,&iscol_new);CHKERRQ(ierr); 555 ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,ismax,isrow,isrow_new);CHKERRQ(ierr); 556 ierr = ISCompressIndicesGeneral(N,C->cmap->n,bs,ismax,iscol,iscol_new);CHKERRQ(ierr); 557 558 /* Check for special case: each processor gets entire matrix columns */ 559 ierr = PetscMalloc2(ismax+1,&allcolumns,ismax+1,&allrows);CHKERRQ(ierr); 560 for (i=0; i<ismax; i++) { 561 ierr = ISIdentity(iscol[i],&colflag);CHKERRQ(ierr); 562 ierr = ISGetLocalSize(iscol[i],&ncol);CHKERRQ(ierr); 563 if (colflag && ncol == C->cmap->N) allcolumns[i] = PETSC_TRUE; 564 else allcolumns[i] = PETSC_FALSE; 565 566 ierr = ISIdentity(isrow[i],&colflag);CHKERRQ(ierr); 567 ierr = ISGetLocalSize(isrow[i],&nrow);CHKERRQ(ierr); 568 if (colflag && nrow == C->rmap->N) allrows[i] = PETSC_TRUE; 569 else allrows[i] = PETSC_FALSE; 570 } 571 572 /* Allocate memory to hold all the submatrices */ 573 if (scall != MAT_REUSE_MATRIX) { 574 ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); 575 } 576 /* Determine the number of stages through which submatrices are done */ 577 nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); 578 if (!nmax) nmax = 1; 579 nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); 580 581 /* Make sure every processor loops through the nstages */ 582 ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); 583 for (i=0,pos=0; i<nstages; i++) { 584 if (pos+nmax <= ismax) max_no = nmax; 585 else if (pos == ismax) max_no = 0; 586 else max_no = ismax-pos; 587 ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,isrow_new+pos,iscol_new+pos,scall,allrows+pos,allcolumns+pos,*submat+pos);CHKERRQ(ierr); 588 pos += max_no; 589 } 590 591 for (i=0; i<ismax; i++) { 592 ierr = ISDestroy(&isrow_new[i]);CHKERRQ(ierr); 593 ierr = ISDestroy(&iscol_new[i]);CHKERRQ(ierr); 594 } 595 ierr = PetscFree2(isrow_new,iscol_new);CHKERRQ(ierr); 596 ierr = PetscFree2(allcolumns,allrows);CHKERRQ(ierr); 597 PetscFunctionReturn(0); 598 } 599 600 #if defined(PETSC_USE_CTABLE) 601 #undef __FUNCT__ 602 #define __FUNCT__ "PetscGetProc" 603 PetscErrorCode PetscGetProc(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) 604 { 605 PetscInt nGlobalNd = proc_gnode[size]; 606 PetscMPIInt fproc; 607 PetscErrorCode ierr; 608 609 PetscFunctionBegin; 610 ierr = PetscMPIIntCast((PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5)),&fproc);CHKERRQ(ierr); 611 if (fproc > size) fproc = size; 612 while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { 613 if (row < proc_gnode[fproc]) fproc--; 614 else fproc++; 615 } 616 *rank = fproc; 617 PetscFunctionReturn(0); 618 } 619 #endif 620 621 /* -------------------------------------------------------------------------*/ 622 /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */ 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local" 625 PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allrows,PetscBool *allcolumns,Mat *submats) 626 { 627 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 628 Mat A = c->A; 629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; 630 const PetscInt **irow,**icol,*irow_i; 631 PetscInt *nrow,*ncol,*w3,*w4,start; 632 PetscErrorCode ierr; 633 PetscMPIInt size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc; 634 PetscInt **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row; 635 PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; 636 PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; 637 PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; 638 PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; 639 PetscInt bs =C->rmap->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB; 640 PetscInt cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; 641 PetscInt *bmap = c->garray,ctmp,rstart=c->rstartbs; 642 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3,*s_waits3; 643 MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2,*r_status3; 644 MPI_Comm comm; 645 PetscBool flag; 646 PetscMPIInt *onodes1,*olengths1; 647 PetscBool ijonly=c->ijonly; /* private flag indicates only matrix data structures are requested */ 648 649 /* variables below are used for the matrix numerical values - case of !ijonly */ 650 MPI_Request *r_waits4,*s_waits4; 651 MPI_Status *r_status4,*s_status4; 652 MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a = NULL,*sbuf_aa_i,*vworkA = NULL,*vworkB = NULL; 653 MatScalar *a_a=a->a,*b_a=b->a; 654 655 #if defined(PETSC_USE_CTABLE) 656 PetscInt tt; 657 PetscTable *rmap,*cmap,rmap_i,cmap_i=NULL; 658 #else 659 PetscInt **cmap,*cmap_i=NULL,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; 660 #endif 661 662 PetscFunctionBegin; 663 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 664 tag0 = ((PetscObject)C)->tag; 665 size = c->size; 666 rank = c->rank; 667 668 /* Get some new tags to keep the communication clean */ 669 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 670 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 671 ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); 672 673 #if defined(PETSC_USE_CTABLE) 674 ierr = PetscMalloc4(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol);CHKERRQ(ierr); 675 #else 676 ierr = PetscMalloc5(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol,Mbs+1,&rtable);CHKERRQ(ierr); 677 /* Create hash table for the mapping :row -> proc*/ 678 for (i=0,j=0; i<size; i++) { 679 jmax = C->rmap->range[i+1]/bs; 680 for (; j<jmax; j++) rtable[j] = i; 681 } 682 #endif 683 684 for (i=0; i<ismax; i++) { 685 if (allrows[i]) { 686 irow[i] = NULL; 687 nrow[i] = C->rmap->N/bs; 688 } else { 689 ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); 690 ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); 691 } 692 693 if (allcolumns[i]) { 694 icol[i] = NULL; 695 ncol[i] = C->cmap->N/bs; 696 } else { 697 ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); 698 ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); 699 } 700 } 701 702 /* evaluate communication - mesg to who,length of mesg,and buffer space 703 required. Based on this, buffers are allocated, and data copied into them*/ 704 ierr = PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); 705 ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 706 ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 707 ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); 708 for (i=0; i<ismax; i++) { 709 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 710 jmax = nrow[i]; 711 irow_i = irow[i]; 712 for (j=0; j<jmax; j++) { 713 if (allrows[i]) row = j; 714 else row = irow_i[j]; 715 716 #if defined(PETSC_USE_CTABLE) 717 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 718 #else 719 proc = rtable[row]; 720 #endif 721 w4[proc]++; 722 } 723 for (j=0; j<size; j++) { 724 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 725 } 726 } 727 728 nrqs = 0; /* no of outgoing messages */ 729 msz = 0; /* total mesg length for all proc */ 730 w1[rank] = 0; /* no mesg sent to intself */ 731 w3[rank] = 0; 732 for (i=0; i<size; i++) { 733 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 734 } 735 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ 736 for (i=0,j=0; i<size; i++) { 737 if (w1[i]) { pa[j] = i; j++; } 738 } 739 740 /* Each message would have a header = 1 + 2*(no of IS) + data */ 741 for (i=0; i<nrqs; i++) { 742 j = pa[i]; 743 w1[j] += w2[j] + 2* w3[j]; 744 msz += w1[j]; 745 } 746 747 /* Determine the number of messages to expect, their lengths, from from-ids */ 748 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 749 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 750 751 /* Now post the Irecvs corresponding to these messages */ 752 ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); 753 754 ierr = PetscFree(onodes1);CHKERRQ(ierr); 755 ierr = PetscFree(olengths1);CHKERRQ(ierr); 756 757 /* Allocate Memory for outgoing messages */ 758 ierr = PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);CHKERRQ(ierr); 759 ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); 760 ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 761 { 762 PetscInt *iptr = tmp,ict = 0; 763 for (i=0; i<nrqs; i++) { 764 j = pa[i]; 765 iptr += ict; 766 sbuf1[j] = iptr; 767 ict = w1[j]; 768 } 769 } 770 771 /* Form the outgoing messages */ 772 /* Initialise the header space */ 773 for (i=0; i<nrqs; i++) { 774 j = pa[i]; 775 sbuf1[j][0] = 0; 776 ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 777 ptr[j] = sbuf1[j] + 2*w3[j] + 1; 778 } 779 780 /* Parse the isrow and copy data into outbuf */ 781 for (i=0; i<ismax; i++) { 782 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 783 irow_i = irow[i]; 784 jmax = nrow[i]; 785 for (j=0; j<jmax; j++) { /* parse the indices of each IS */ 786 if (allrows[i]) row = j; 787 else row = irow_i[j]; 788 789 #if defined(PETSC_USE_CTABLE) 790 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 791 #else 792 proc = rtable[row]; 793 #endif 794 if (proc != rank) { /* copy to the outgoing buf*/ 795 ctr[proc]++; 796 *ptr[proc] = row; 797 ptr[proc]++; 798 } 799 } 800 /* Update the headers for the current IS */ 801 for (j=0; j<size; j++) { /* Can Optimise this loop too */ 802 if ((ctr_j = ctr[j])) { 803 sbuf1_j = sbuf1[j]; 804 k = ++sbuf1_j[0]; 805 sbuf1_j[2*k] = ctr_j; 806 sbuf1_j[2*k-1] = i; 807 } 808 } 809 } 810 811 /* Now post the sends */ 812 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 813 for (i=0; i<nrqs; ++i) { 814 j = pa[i]; 815 ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); 816 } 817 818 /* Post Recieves to capture the buffer size */ 819 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); 820 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);CHKERRQ(ierr); 821 rbuf2[0] = tmp + msz; 822 for (i=1; i<nrqs; ++i) { 823 j = pa[i]; 824 rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 825 } 826 for (i=0; i<nrqs; ++i) { 827 j = pa[i]; 828 ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); 829 } 830 831 /* Send to other procs the buf size they should allocate */ 832 833 /* Receive messages*/ 834 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 835 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); 836 ierr = PetscMalloc3(nrqr+1,&sbuf2,nrqr,&req_size,nrqr,&req_source);CHKERRQ(ierr); 837 { 838 Mat_SeqBAIJ *sA = (Mat_SeqBAIJ*)c->A->data,*sB = (Mat_SeqBAIJ*)c->B->data; 839 PetscInt *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; 840 841 for (i=0; i<nrqr; ++i) { 842 ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); 843 844 req_size[idex] = 0; 845 rbuf1_i = rbuf1[idex]; 846 start = 2*rbuf1_i[0] + 1; 847 ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); 848 ierr = PetscMalloc(end*sizeof(PetscInt),&sbuf2[idex]);CHKERRQ(ierr); 849 sbuf2_i = sbuf2[idex]; 850 for (j=start; j<end; j++) { 851 id = rbuf1_i[j] - rstart; 852 ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id]; 853 sbuf2_i[j] = ncols; 854 req_size[idex] += ncols; 855 } 856 req_source[idex] = r_status1[i].MPI_SOURCE; 857 /* form the header */ 858 sbuf2_i[0] = req_size[idex]; 859 for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j]; 860 ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);CHKERRQ(ierr); 861 } 862 } 863 ierr = PetscFree(r_status1);CHKERRQ(ierr); 864 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 865 866 /* recv buffer sizes */ 867 /* Receive messages*/ 868 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);CHKERRQ(ierr); 869 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr); 870 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); 871 if (!ijonly) { 872 ierr = PetscMalloc((nrqs+1)*sizeof(MatScalar*),&rbuf4);CHKERRQ(ierr); 873 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr); 874 } 875 876 for (i=0; i<nrqs; ++i) { 877 ierr = MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);CHKERRQ(ierr); 878 ierr = PetscMalloc(rbuf2[idex][0]*sizeof(PetscInt),&rbuf3[idex]);CHKERRQ(ierr); 879 ierr = MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);CHKERRQ(ierr); 880 if (!ijonly) { 881 ierr = PetscMalloc(rbuf2[idex][0]*bs2*sizeof(MatScalar),&rbuf4[idex]);CHKERRQ(ierr); 882 ierr = MPI_Irecv(rbuf4[idex],rbuf2[idex][0]*bs2,MPIU_MATSCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);CHKERRQ(ierr); 883 } 884 } 885 ierr = PetscFree(r_status2);CHKERRQ(ierr); 886 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 887 888 /* Wait on sends1 and sends2 */ 889 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); 890 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); 891 892 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} 893 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} 894 ierr = PetscFree(s_status1);CHKERRQ(ierr); 895 ierr = PetscFree(s_status2);CHKERRQ(ierr); 896 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 897 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 898 899 /* Now allocate buffers for a->j, and send them off */ 900 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr); 901 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 902 ierr = PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);CHKERRQ(ierr); 903 for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 904 905 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);CHKERRQ(ierr); 906 { 907 for (i=0; i<nrqr; i++) { 908 rbuf1_i = rbuf1[i]; 909 sbuf_aj_i = sbuf_aj[i]; 910 ct1 = 2*rbuf1_i[0] + 1; 911 ct2 = 0; 912 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 913 kmax = rbuf1[i][2*j]; 914 for (k=0; k<kmax; k++,ct1++) { 915 row = rbuf1_i[ct1] - rstart; 916 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 917 ncols = nzA + nzB; 918 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 919 920 /* load the column indices for this row into cols*/ 921 cols = sbuf_aj_i + ct2; 922 for (l=0; l<nzB; l++) { 923 if ((ctmp = bmap[cworkB[l]]) < cstart) cols[l] = ctmp; 924 else break; 925 } 926 imark = l; 927 for (l=0; l<nzA; l++) cols[imark+l] = cstart + cworkA[l]; 928 for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]]; 929 ct2 += ncols; 930 } 931 } 932 ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr); 933 } 934 } 935 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);CHKERRQ(ierr); 936 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);CHKERRQ(ierr); 937 938 /* Allocate buffers for a->a, and send them off */ 939 if (!ijonly) { 940 ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar*),&sbuf_aa);CHKERRQ(ierr); 941 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 942 ierr = PetscMalloc((j+1)*bs2*sizeof(MatScalar),&sbuf_aa[0]);CHKERRQ(ierr); 943 for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2; 944 945 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);CHKERRQ(ierr); 946 { 947 for (i=0; i<nrqr; i++) { 948 rbuf1_i = rbuf1[i]; 949 sbuf_aa_i = sbuf_aa[i]; 950 ct1 = 2*rbuf1_i[0]+1; 951 ct2 = 0; 952 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 953 kmax = rbuf1_i[2*j]; 954 for (k=0; k<kmax; k++,ct1++) { 955 row = rbuf1_i[ct1] - rstart; 956 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 957 ncols = nzA + nzB; 958 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 959 vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2; 960 961 /* load the column values for this row into vals*/ 962 vals = sbuf_aa_i+ct2*bs2; 963 for (l=0; l<nzB; l++) { 964 if ((bmap[cworkB[l]]) < cstart) { 965 ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 966 } else break; 967 } 968 imark = l; 969 for (l=0; l<nzA; l++) { 970 ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 971 } 972 for (l=imark; l<nzB; l++) { 973 ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 974 } 975 ct2 += ncols; 976 } 977 } 978 ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_MATSCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); 979 } 980 } 981 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); 982 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); 983 } 984 ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); 985 ierr = PetscFree(rbuf1);CHKERRQ(ierr); 986 987 /* Form the matrix */ 988 /* create col map: global col of C -> local col of submatrices */ 989 { 990 const PetscInt *icol_i; 991 #if defined(PETSC_USE_CTABLE) 992 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&cmap);CHKERRQ(ierr); 993 for (i=0; i<ismax; i++) { 994 if (!allcolumns[i]) { 995 ierr = PetscTableCreate(ncol[i]+1,c->Nbs+1,&cmap[i]);CHKERRQ(ierr); 996 jmax = ncol[i]; 997 icol_i = icol[i]; 998 cmap_i = cmap[i]; 999 for (j=0; j<jmax; j++) { 1000 ierr = PetscTableAdd(cmap_i,icol_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1001 } 1002 } else { 1003 cmap[i] = NULL; 1004 } 1005 } 1006 #else 1007 ierr = PetscMalloc(ismax*sizeof(PetscInt*),&cmap);CHKERRQ(ierr); 1008 for (i=0; i<ismax; i++) { 1009 if (!allcolumns[i]) { 1010 ierr = PetscMalloc(c->Nbs*sizeof(PetscInt),&cmap[i]);CHKERRQ(ierr); 1011 ierr = PetscMemzero(cmap[i],c->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 1012 jmax = ncol[i]; 1013 icol_i = icol[i]; 1014 cmap_i = cmap[i]; 1015 for (j=0; j<jmax; j++) { 1016 cmap_i[icol_i[j]] = j+1; 1017 } 1018 } else { /* allcolumns[i] */ 1019 cmap[i] = NULL; 1020 } 1021 } 1022 #endif 1023 } 1024 1025 /* Create lens which is required for MatCreate... */ 1026 for (i=0,j=0; i<ismax; i++) j += nrow[i]; 1027 ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ j*sizeof(PetscInt),&lens);CHKERRQ(ierr); 1028 lens[0] = (PetscInt*)(lens + ismax); 1029 ierr = PetscMemzero(lens[0],j*sizeof(PetscInt));CHKERRQ(ierr); 1030 for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1]; 1031 1032 /* Update lens from local data */ 1033 for (i=0; i<ismax; i++) { 1034 jmax = nrow[i]; 1035 if (!allcolumns[i]) cmap_i = cmap[i]; 1036 irow_i = irow[i]; 1037 lens_i = lens[i]; 1038 for (j=0; j<jmax; j++) { 1039 if (allrows[i]) row = j; 1040 else row = irow_i[j]; 1041 1042 #if defined(PETSC_USE_CTABLE) 1043 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1044 #else 1045 proc = rtable[row]; 1046 #endif 1047 if (proc == rank) { 1048 /* Get indices from matA and then from matB */ 1049 row = row - rstart; 1050 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 1051 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 1052 if (!allcolumns[i]) { 1053 #if defined(PETSC_USE_CTABLE) 1054 for (k=0; k<nzA; k++) { 1055 ierr = PetscTableFind(cmap_i,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr); 1056 if (tt) lens_i[j]++; 1057 } 1058 for (k=0; k<nzB; k++) { 1059 ierr = PetscTableFind(cmap_i,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr); 1060 if (tt) lens_i[j]++; 1061 } 1062 1063 #else 1064 for (k=0; k<nzA; k++) { 1065 if (cmap_i[cstart + cworkA[k]]) lens_i[j]++; 1066 } 1067 for (k=0; k<nzB; k++) { 1068 if (cmap_i[bmap[cworkB[k]]]) lens_i[j]++; 1069 } 1070 #endif 1071 } else { /* allcolumns */ 1072 lens_i[j] = nzA + nzB; 1073 } 1074 } 1075 } 1076 } 1077 #if defined(PETSC_USE_CTABLE) 1078 /* Create row map*/ 1079 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&rmap);CHKERRQ(ierr); 1080 for (i=0; i<ismax; i++) { 1081 ierr = PetscTableCreate(nrow[i]+1,c->Mbs+1,&rmap[i]);CHKERRQ(ierr); 1082 } 1083 #else 1084 /* Create row map*/ 1085 ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ ismax*Mbs*sizeof(PetscInt),&rmap);CHKERRQ(ierr); 1086 rmap[0] = (PetscInt*)(rmap + ismax); 1087 ierr = PetscMemzero(rmap[0],ismax*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + Mbs; 1089 #endif 1090 for (i=0; i<ismax; i++) { 1091 irow_i = irow[i]; 1092 jmax = nrow[i]; 1093 #if defined(PETSC_USE_CTABLE) 1094 rmap_i = rmap[i]; 1095 for (j=0; j<jmax; j++) { 1096 if (allrows[i]) { 1097 ierr = PetscTableAdd(rmap_i,j+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1098 } else { 1099 ierr = PetscTableAdd(rmap_i,irow_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1100 } 1101 } 1102 #else 1103 rmap_i = rmap[i]; 1104 for (j=0; j<jmax; j++) { 1105 if (allrows[i]) rmap_i[j] = j; 1106 else rmap_i[irow_i[j]] = j; 1107 } 1108 #endif 1109 } 1110 1111 /* Update lens from offproc data */ 1112 { 1113 PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i; 1114 PetscMPIInt ii; 1115 1116 for (tmp2=0; tmp2<nrqs; tmp2++) { 1117 ierr = MPI_Waitany(nrqs,r_waits3,&ii,r_status3+tmp2);CHKERRQ(ierr); 1118 idex = pa[ii]; 1119 sbuf1_i = sbuf1[idex]; 1120 jmax = sbuf1_i[0]; 1121 ct1 = 2*jmax+1; 1122 ct2 = 0; 1123 rbuf2_i = rbuf2[ii]; 1124 rbuf3_i = rbuf3[ii]; 1125 for (j=1; j<=jmax; j++) { 1126 is_no = sbuf1_i[2*j-1]; 1127 max1 = sbuf1_i[2*j]; 1128 lens_i = lens[is_no]; 1129 if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1130 rmap_i = rmap[is_no]; 1131 for (k=0; k<max1; k++,ct1++) { 1132 #if defined(PETSC_USE_CTABLE) 1133 ierr = PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr); 1134 row--; 1135 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1136 #else 1137 row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */ 1138 #endif 1139 max2 = rbuf2_i[ct1]; 1140 for (l=0; l<max2; l++,ct2++) { 1141 if (!allcolumns[is_no]) { 1142 #if defined(PETSC_USE_CTABLE) 1143 ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tt);CHKERRQ(ierr); 1144 if (tt) lens_i[row]++; 1145 #else 1146 if (cmap_i[rbuf3_i[ct2]]) lens_i[row]++; 1147 #endif 1148 } else { /* allcolumns */ 1149 lens_i[row]++; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 } 1156 ierr = PetscFree(r_status3);CHKERRQ(ierr); 1157 ierr = PetscFree(r_waits3);CHKERRQ(ierr); 1158 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);} 1159 ierr = PetscFree(s_status3);CHKERRQ(ierr); 1160 ierr = PetscFree(s_waits3);CHKERRQ(ierr); 1161 1162 /* Create the submatrices */ 1163 if (scall == MAT_REUSE_MATRIX) { 1164 if (ijonly) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_REUSE_MATRIX and ijonly is not supported yet"); 1165 /* 1166 Assumes new rows are same length as the old rows, hence bug! 1167 */ 1168 for (i=0; i<ismax; i++) { 1169 mat = (Mat_SeqBAIJ*)(submats[i]->data); 1170 if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 1171 ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 1172 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); 1173 /* Initial matrix as if empty */ 1174 ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr); 1175 1176 submats[i]->factortype = C->factortype; 1177 } 1178 } else { 1179 PetscInt bs_tmp; 1180 if (ijonly) bs_tmp = 1; 1181 else bs_tmp = bs; 1182 for (i=0; i<ismax; i++) { 1183 ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); 1184 ierr = MatSetSizes(submats[i],nrow[i]*bs_tmp,ncol[i]*bs_tmp,nrow[i]*bs_tmp,ncol[i]*bs_tmp);CHKERRQ(ierr); 1185 ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); 1186 ierr = MatSeqBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); 1187 ierr = MatSeqSBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by SBAIJ routines */ 1188 } 1189 } 1190 1191 /* Assemble the matrices */ 1192 /* First assemble the local rows */ 1193 { 1194 PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i; 1195 MatScalar *imat_a = NULL; 1196 1197 for (i=0; i<ismax; i++) { 1198 mat = (Mat_SeqBAIJ*)submats[i]->data; 1199 imat_ilen = mat->ilen; 1200 imat_j = mat->j; 1201 imat_i = mat->i; 1202 if (!ijonly) imat_a = mat->a; 1203 if (!allcolumns[i]) cmap_i = cmap[i]; 1204 rmap_i = rmap[i]; 1205 irow_i = irow[i]; 1206 jmax = nrow[i]; 1207 for (j=0; j<jmax; j++) { 1208 if (allrows[i]) row = j; 1209 else row = irow_i[j]; 1210 1211 #if defined(PETSC_USE_CTABLE) 1212 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1213 #else 1214 proc = rtable[row]; 1215 #endif 1216 if (proc == rank) { 1217 row = row - rstart; 1218 nzA = a_i[row+1] - a_i[row]; 1219 nzB = b_i[row+1] - b_i[row]; 1220 cworkA = a_j + a_i[row]; 1221 cworkB = b_j + b_i[row]; 1222 if (!ijonly) { 1223 vworkA = a_a + a_i[row]*bs2; 1224 vworkB = b_a + b_i[row]*bs2; 1225 } 1226 #if defined(PETSC_USE_CTABLE) 1227 ierr = PetscTableFind(rmap_i,row+rstart+1,&row);CHKERRQ(ierr); 1228 row--; 1229 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1230 #else 1231 row = rmap_i[row + rstart]; 1232 #endif 1233 mat_i = imat_i[row]; 1234 if (!ijonly) mat_a = imat_a + mat_i*bs2; 1235 mat_j = imat_j + mat_i; 1236 ilen_row = imat_ilen[row]; 1237 1238 /* load the column indices for this row into cols*/ 1239 if (!allcolumns[i]) { 1240 for (l=0; l<nzB; l++) { 1241 if ((ctmp = bmap[cworkB[l]]) < cstart) { 1242 #if defined(PETSC_USE_CTABLE) 1243 ierr = PetscTableFind(cmap_i,ctmp+1,&tcol);CHKERRQ(ierr); 1244 if (tcol) { 1245 #else 1246 if ((tcol = cmap_i[ctmp])) { 1247 #endif 1248 *mat_j++ = tcol - 1; 1249 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1250 mat_a += bs2; 1251 ilen_row++; 1252 } 1253 } else break; 1254 } 1255 imark = l; 1256 for (l=0; l<nzA; l++) { 1257 #if defined(PETSC_USE_CTABLE) 1258 ierr = PetscTableFind(cmap_i,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr); 1259 if (tcol) { 1260 #else 1261 if ((tcol = cmap_i[cstart + cworkA[l]])) { 1262 #endif 1263 *mat_j++ = tcol - 1; 1264 if (!ijonly) { 1265 ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1266 mat_a += bs2; 1267 } 1268 ilen_row++; 1269 } 1270 } 1271 for (l=imark; l<nzB; l++) { 1272 #if defined(PETSC_USE_CTABLE) 1273 ierr = PetscTableFind(cmap_i,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr); 1274 if (tcol) { 1275 #else 1276 if ((tcol = cmap_i[bmap[cworkB[l]]])) { 1277 #endif 1278 *mat_j++ = tcol - 1; 1279 if (!ijonly) { 1280 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1281 mat_a += bs2; 1282 } 1283 ilen_row++; 1284 } 1285 } 1286 } else { /* allcolumns */ 1287 for (l=0; l<nzB; l++) { 1288 if ((ctmp = bmap[cworkB[l]]) < cstart) { 1289 *mat_j++ = ctmp; 1290 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1291 mat_a += bs2; 1292 ilen_row++; 1293 } else break; 1294 } 1295 imark = l; 1296 for (l=0; l<nzA; l++) { 1297 *mat_j++ = cstart+cworkA[l]; 1298 if (!ijonly) { 1299 ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1300 mat_a += bs2; 1301 } 1302 ilen_row++; 1303 } 1304 for (l=imark; l<nzB; l++) { 1305 *mat_j++ = bmap[cworkB[l]]; 1306 if (!ijonly) { 1307 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1308 mat_a += bs2; 1309 } 1310 ilen_row++; 1311 } 1312 } 1313 imat_ilen[row] = ilen_row; 1314 } 1315 } 1316 } 1317 } 1318 1319 /* Now assemble the off proc rows*/ 1320 { 1321 PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen; 1322 PetscInt *imat_j,*imat_i; 1323 MatScalar *imat_a = NULL,*rbuf4_i = NULL; 1324 PetscMPIInt ii; 1325 1326 for (tmp2=0; tmp2<nrqs; tmp2++) { 1327 if (ijonly) ii = tmp2; 1328 else { 1329 ierr = MPI_Waitany(nrqs,r_waits4,&ii,r_status4+tmp2);CHKERRQ(ierr); 1330 } 1331 idex = pa[ii]; 1332 sbuf1_i = sbuf1[idex]; 1333 jmax = sbuf1_i[0]; 1334 ct1 = 2*jmax + 1; 1335 ct2 = 0; 1336 rbuf2_i = rbuf2[ii]; 1337 rbuf3_i = rbuf3[ii]; 1338 if (!ijonly) rbuf4_i = rbuf4[ii]; 1339 for (j=1; j<=jmax; j++) { 1340 is_no = sbuf1_i[2*j-1]; 1341 if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1342 rmap_i = rmap[is_no]; 1343 mat = (Mat_SeqBAIJ*)submats[is_no]->data; 1344 imat_ilen = mat->ilen; 1345 imat_j = mat->j; 1346 imat_i = mat->i; 1347 if (!ijonly) imat_a = mat->a; 1348 max1 = sbuf1_i[2*j]; 1349 for (k=0; k<max1; k++,ct1++) { 1350 row = sbuf1_i[ct1]; 1351 #if defined(PETSC_USE_CTABLE) 1352 ierr = PetscTableFind(rmap_i,row+1,&row);CHKERRQ(ierr); 1353 row--; 1354 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1355 #else 1356 row = rmap_i[row]; 1357 #endif 1358 ilen = imat_ilen[row]; 1359 mat_i = imat_i[row]; 1360 if (!ijonly) mat_a = imat_a + mat_i*bs2; 1361 mat_j = imat_j + mat_i; 1362 max2 = rbuf2_i[ct1]; 1363 1364 if (!allcolumns[is_no]) { 1365 for (l=0; l<max2; l++,ct2++) { 1366 #if defined(PETSC_USE_CTABLE) 1367 ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 1368 if (tcol) { 1369 #else 1370 if ((tcol = cmap_i[rbuf3_i[ct2]])) { 1371 #endif 1372 *mat_j++ = tcol - 1; 1373 if (!ijonly) { 1374 ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1375 mat_a += bs2; 1376 } 1377 ilen++; 1378 } 1379 } 1380 } else { /* allcolumns */ 1381 for (l=0; l<max2; l++,ct2++) { 1382 *mat_j++ = rbuf3_i[ct2]; 1383 if (!ijonly) { 1384 ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1385 mat_a += bs2; 1386 } 1387 ilen++; 1388 } 1389 } 1390 imat_ilen[row] = ilen; 1391 } 1392 } 1393 } 1394 } 1395 if (!ijonly) { 1396 ierr = PetscFree(r_status4);CHKERRQ(ierr); 1397 ierr = PetscFree(r_waits4);CHKERRQ(ierr); 1398 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} 1399 ierr = PetscFree(s_waits4);CHKERRQ(ierr); 1400 ierr = PetscFree(s_status4);CHKERRQ(ierr); 1401 } 1402 1403 /* Restore the indices */ 1404 for (i=0; i<ismax; i++) { 1405 if (!allrows[i]) { 1406 ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); 1407 } 1408 if (!allcolumns[i]) { 1409 ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); 1410 } 1411 } 1412 1413 /* Destroy allocated memory */ 1414 #if defined(PETSC_USE_CTABLE) 1415 ierr = PetscFree4(irow,icol,nrow,ncol);CHKERRQ(ierr); 1416 #else 1417 ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); 1418 #endif 1419 ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 1420 ierr = PetscFree(pa);CHKERRQ(ierr); 1421 1422 ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); 1423 ierr = PetscFree(sbuf1);CHKERRQ(ierr); 1424 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 1425 for (i=0; i<nrqr; ++i) { 1426 ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); 1427 } 1428 for (i=0; i<nrqs; ++i) { 1429 ierr = PetscFree(rbuf3[i]);CHKERRQ(ierr); 1430 } 1431 ierr = PetscFree3(sbuf2,req_size,req_source);CHKERRQ(ierr); 1432 ierr = PetscFree(rbuf3);CHKERRQ(ierr); 1433 ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); 1434 ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); 1435 if (!ijonly) { 1436 for (i=0; i<nrqs; ++i) {ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr);} 1437 ierr = PetscFree(rbuf4);CHKERRQ(ierr); 1438 ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr); 1439 ierr = PetscFree(sbuf_aa);CHKERRQ(ierr); 1440 } 1441 1442 #if defined(PETSC_USE_CTABLE) 1443 for (i=0; i<ismax; i++) { 1444 ierr = PetscTableDestroy((PetscTable*)&rmap[i]);CHKERRQ(ierr); 1445 } 1446 #endif 1447 ierr = PetscFree(rmap);CHKERRQ(ierr); 1448 1449 for (i=0; i<ismax; i++) { 1450 if (!allcolumns[i]) { 1451 #if defined(PETSC_USE_CTABLE) 1452 ierr = PetscTableDestroy((PetscTable*)&cmap[i]);CHKERRQ(ierr); 1453 #else 1454 ierr = PetscFree(cmap[i]);CHKERRQ(ierr); 1455 #endif 1456 } 1457 } 1458 ierr = PetscFree(cmap);CHKERRQ(ierr); 1459 ierr = PetscFree(lens);CHKERRQ(ierr); 1460 1461 for (i=0; i<ismax; i++) { 1462 ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 } 1465 1466 c->ijonly = PETSC_FALSE; /* set back to the default */ 1467 PetscFunctionReturn(0); 1468 } 1469 1470