1 2 /* 3 Routines to compute overlapping regions of a parallel MPI matrix 4 and to find submatrices that were shared across processors. 5 */ 6 #include <../src/mat/impls/baij/mpi/mpibaij.h> 7 #include <petscbt.h> 8 9 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**); 10 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); 11 extern PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 12 extern PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 13 14 PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) 15 { 16 PetscErrorCode ierr; 17 PetscInt i,N=C->cmap->N, bs=C->rmap->bs; 18 IS *is_new; 19 20 PetscFunctionBegin; 21 ierr = PetscMalloc1(imax,&is_new);CHKERRQ(ierr); 22 /* Convert the indices into block format */ 23 ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,imax,is,is_new);CHKERRQ(ierr); 24 if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); 25 for (i=0; i<ov; ++i) { 26 ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,imax,is_new);CHKERRQ(ierr); 27 } 28 for (i=0; i<imax; i++) {ierr = ISDestroy(&is[i]);CHKERRQ(ierr);} 29 ierr = ISExpandIndicesGeneral(N,N,bs,imax,is_new,is);CHKERRQ(ierr); 30 for (i=0; i<imax; i++) {ierr = ISDestroy(&is_new[i]);CHKERRQ(ierr);} 31 ierr = PetscFree(is_new);CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 /* 36 Sample message format: 37 If a processor A wants processor B to process some elements corresponding 38 to index sets is[1], is[5] 39 mesg [0] = 2 (no of index sets in the mesg) 40 ----------- 41 mesg [1] = 1 => is[1] 42 mesg [2] = sizeof(is[1]); 43 ----------- 44 mesg [5] = 5 => is[5] 45 mesg [6] = sizeof(is[5]); 46 ----------- 47 mesg [7] 48 mesg [n] data(is[1]) 49 ----------- 50 mesg[n+1] 51 mesg[m] data(is[5]) 52 ----------- 53 54 Notes: 55 nrqs - no of requests sent (or to be sent out) 56 nrqr - no of requests recieved (which have to be or which have been processed 57 */ 58 PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[]) 59 { 60 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 61 const PetscInt **idx,*idx_i; 62 PetscInt **iidx; 63 PetscInt *n,*w3,*w4,**data,len; 64 PetscErrorCode ierr; 65 PetscMPIInt size,rank,tag1,tag2,*w2,*w1,nrqr; 66 PetscInt Mbs,i,j,k,**rbuf,row,proc=-1,nrqs,msz,**outdat,**ptr; 67 PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p; 68 PetscMPIInt *onodes1,*olengths1,*onodes2,*olengths2; 69 PetscBT *table; 70 MPI_Comm comm; 71 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; 72 MPI_Status *s_status,*recv_status; 73 char *t_p; 74 75 PetscFunctionBegin; 76 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 77 size = c->size; 78 rank = c->rank; 79 Mbs = c->Mbs; 80 81 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 82 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 83 84 ierr = PetscMalloc2(imax+1,(PetscInt***)&idx,imax,&n);CHKERRQ(ierr); 85 86 for (i=0; i<imax; i++) { 87 ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 88 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 89 } 90 91 /* evaluate communication - mesg to who,length of mesg, and buffer space 92 required. Based on this, buffers are allocated, and data copied into them*/ 93 ierr = PetscCalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); 94 for (i=0; i<imax; i++) { 95 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 96 idx_i = idx[i]; 97 len = n[i]; 98 for (j=0; j<len; j++) { 99 row = idx_i[j]; 100 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries"); 101 ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 102 w4[proc]++; 103 } 104 for (j=0; j<size; j++) { 105 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 106 } 107 } 108 109 nrqs = 0; /* no of outgoing messages */ 110 msz = 0; /* total mesg length (for all proc */ 111 w1[rank] = 0; /* no mesg sent to itself */ 112 w3[rank] = 0; 113 for (i=0; i<size; i++) { 114 if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 115 } 116 /* pa - is list of processors to communicate with */ 117 ierr = PetscMalloc1(nrqs+1,&pa);CHKERRQ(ierr); 118 for (i=0,j=0; i<size; i++) { 119 if (w1[i]) {pa[j] = i; j++;} 120 } 121 122 /* Each message would have a header = 1 + 2*(no of IS) + data */ 123 for (i=0; i<nrqs; i++) { 124 j = pa[i]; 125 w1[j] += w2[j] + 2*w3[j]; 126 msz += w1[j]; 127 } 128 129 /* Determine the number of messages to expect, their lengths, from from-ids */ 130 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 131 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 132 133 /* Now post the Irecvs corresponding to these messages */ 134 ierr = PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);CHKERRQ(ierr); 135 136 /* Allocate Memory for outgoing messages */ 137 ierr = PetscMalloc4(size,&outdat,size,&ptr,msz,&tmp,size,&ctr);CHKERRQ(ierr); 138 ierr = PetscMemzero(outdat,size*sizeof(PetscInt*));CHKERRQ(ierr); 139 ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 140 { 141 PetscInt *iptr = tmp,ict = 0; 142 for (i=0; i<nrqs; i++) { 143 j = pa[i]; 144 iptr += ict; 145 outdat[j] = iptr; 146 ict = w1[j]; 147 } 148 } 149 150 /* Form the outgoing messages */ 151 /*plug in the headers*/ 152 for (i=0; i<nrqs; i++) { 153 j = pa[i]; 154 outdat[j][0] = 0; 155 ierr = PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 156 ptr[j] = outdat[j] + 2*w3[j] + 1; 157 } 158 159 /* Memory for doing local proc's work*/ 160 { 161 ierr = PetscCalloc5(imax,&table, imax,&data, imax,&isz, Mbs*imax,&d_p, (Mbs/PETSC_BITS_PER_BYTE+1)*imax,&t_p);CHKERRQ(ierr); 162 163 for (i=0; i<imax; i++) { 164 table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 165 data[i] = d_p + (Mbs)*i; 166 } 167 } 168 169 /* Parse the IS and update local tables and the outgoing buf with the data*/ 170 { 171 PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j; 172 PetscBT table_i; 173 174 for (i=0; i<imax; i++) { 175 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 176 n_i = n[i]; 177 table_i = table[i]; 178 idx_i = idx[i]; 179 data_i = data[i]; 180 isz_i = isz[i]; 181 for (j=0; j<n_i; j++) { /* parse the indices of each IS */ 182 row = idx_i[j]; 183 ierr = PetscLayoutFindOwner(C->rmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); 184 if (proc != rank) { /* copy to the outgoing buffer */ 185 ctr[proc]++; 186 *ptr[proc] = row; 187 ptr[proc]++; 188 } else { /* Update the local table */ 189 if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 190 } 191 } 192 /* Update the headers for the current IS */ 193 for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */ 194 if ((ctr_j = ctr[j])) { 195 outdat_j = outdat[j]; 196 k = ++outdat_j[0]; 197 outdat_j[2*k] = ctr_j; 198 outdat_j[2*k-1] = i; 199 } 200 } 201 isz[i] = isz_i; 202 } 203 } 204 205 /* Now post the sends */ 206 ierr = PetscMalloc1(nrqs+1,&s_waits1);CHKERRQ(ierr); 207 for (i=0; i<nrqs; ++i) { 208 j = pa[i]; 209 ierr = MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);CHKERRQ(ierr); 210 } 211 212 /* No longer need the original indices*/ 213 for (i=0; i<imax; ++i) { 214 ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 215 } 216 iidx = (PetscInt**)idx; /* required because PetscFreeN() cannot work on const */ 217 ierr = PetscFree2(iidx,n);CHKERRQ(ierr); 218 219 for (i=0; i<imax; ++i) { 220 ierr = ISDestroy(&is[i]);CHKERRQ(ierr); 221 } 222 223 /* Do Local work*/ 224 ierr = MatIncreaseOverlap_MPIBAIJ_Local(C,imax,table,isz,data);CHKERRQ(ierr); 225 226 /* Receive messages*/ 227 ierr = PetscMalloc1(nrqr+1,&recv_status);CHKERRQ(ierr); 228 if (nrqr) {ierr = MPI_Waitall(nrqr,r_waits1,recv_status);CHKERRQ(ierr);} 229 230 ierr = PetscMalloc1(nrqs+1,&s_status);CHKERRQ(ierr); 231 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} 232 233 /* Phase 1 sends are complete - deallocate buffers */ 234 ierr = PetscFree4(outdat,ptr,tmp,ctr);CHKERRQ(ierr); 235 ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 236 237 ierr = PetscMalloc1(nrqr+1,&xdata);CHKERRQ(ierr); 238 ierr = PetscMalloc1(nrqr+1,&isz1);CHKERRQ(ierr); 239 ierr = MatIncreaseOverlap_MPIBAIJ_Receive(C,nrqr,rbuf,xdata,isz1);CHKERRQ(ierr); 240 ierr = PetscFree(rbuf[0]);CHKERRQ(ierr); 241 ierr = PetscFree(rbuf);CHKERRQ(ierr); 242 243 /* Send the data back*/ 244 /* Do a global reduction to know the buffer space req for incoming messages*/ 245 { 246 PetscMPIInt *rw1; 247 248 ierr = PetscCalloc1(size,&rw1);CHKERRQ(ierr); 249 250 for (i=0; i<nrqr; ++i) { 251 proc = recv_status[i].MPI_SOURCE; 252 if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch"); 253 rw1[proc] = isz1[i]; 254 } 255 256 ierr = PetscFree(onodes1);CHKERRQ(ierr); 257 ierr = PetscFree(olengths1);CHKERRQ(ierr); 258 259 /* Determine the number of messages to expect, their lengths, from from-ids */ 260 ierr = PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);CHKERRQ(ierr); 261 ierr = PetscFree(rw1);CHKERRQ(ierr); 262 } 263 /* Now post the Irecvs corresponding to these messages */ 264 ierr = PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);CHKERRQ(ierr); 265 266 /* Now post the sends */ 267 ierr = PetscMalloc1(nrqr+1,&s_waits2);CHKERRQ(ierr); 268 for (i=0; i<nrqr; ++i) { 269 j = recv_status[i].MPI_SOURCE; 270 ierr = MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);CHKERRQ(ierr); 271 } 272 273 /* receive work done on other processors*/ 274 { 275 PetscMPIInt idex; 276 PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax; 277 PetscBT table_i; 278 MPI_Status *status2; 279 280 ierr = PetscMalloc1(PetscMax(nrqr,nrqs)+1,&status2);CHKERRQ(ierr); 281 for (i=0; i<nrqs; ++i) { 282 ierr = MPI_Waitany(nrqs,r_waits2,&idex,status2+i);CHKERRQ(ierr); 283 /* Process the message*/ 284 rbuf2_i = rbuf2[idex]; 285 ct1 = 2*rbuf2_i[0]+1; 286 jmax = rbuf2[idex][0]; 287 for (j=1; j<=jmax; j++) { 288 max = rbuf2_i[2*j]; 289 is_no = rbuf2_i[2*j-1]; 290 isz_i = isz[is_no]; 291 data_i = data[is_no]; 292 table_i = table[is_no]; 293 for (k=0; k<max; k++,ct1++) { 294 row = rbuf2_i[ct1]; 295 if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; 296 } 297 isz[is_no] = isz_i; 298 } 299 } 300 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,status2);CHKERRQ(ierr);} 301 ierr = PetscFree(status2);CHKERRQ(ierr); 302 } 303 304 for (i=0; i<imax; ++i) { 305 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 306 } 307 308 309 ierr = PetscFree(onodes2);CHKERRQ(ierr); 310 ierr = PetscFree(olengths2);CHKERRQ(ierr); 311 312 ierr = PetscFree(pa);CHKERRQ(ierr); 313 ierr = PetscFree(rbuf2[0]);CHKERRQ(ierr); 314 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 315 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 316 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 317 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 318 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 319 ierr = PetscFree5(table,data,isz,d_p,t_p);CHKERRQ(ierr); 320 ierr = PetscFree(s_status);CHKERRQ(ierr); 321 ierr = PetscFree(recv_status);CHKERRQ(ierr); 322 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 323 ierr = PetscFree(xdata);CHKERRQ(ierr); 324 ierr = PetscFree(isz1);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 /* 329 MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do 330 the work on the local processor. 331 332 Inputs: 333 C - MAT_MPIBAIJ; 334 imax - total no of index sets processed at a time; 335 table - an array of char - size = Mbs bits. 336 337 Output: 338 isz - array containing the count of the solution elements corresponding 339 to each index set; 340 data - pointer to the solutions 341 */ 342 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data) 343 { 344 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 345 Mat A = c->A,B = c->B; 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 347 PetscInt start,end,val,max,rstart,cstart,*ai,*aj; 348 PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; 349 PetscBT table_i; 350 351 PetscFunctionBegin; 352 rstart = c->rstartbs; 353 cstart = c->cstartbs; 354 ai = a->i; 355 aj = a->j; 356 bi = b->i; 357 bj = b->j; 358 garray = c->garray; 359 360 361 for (i=0; i<imax; i++) { 362 data_i = data[i]; 363 table_i = table[i]; 364 isz_i = isz[i]; 365 for (j=0,max=isz[i]; j<max; j++) { 366 row = data_i[j] - rstart; 367 start = ai[row]; 368 end = ai[row+1]; 369 for (k=start; k<end; k++) { /* Amat */ 370 val = aj[k] + cstart; 371 if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 372 } 373 start = bi[row]; 374 end = bi[row+1]; 375 for (k=start; k<end; k++) { /* Bmat */ 376 val = garray[bj[k]]; 377 if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val; 378 } 379 } 380 isz[i] = isz_i; 381 } 382 PetscFunctionReturn(0); 383 } 384 /* 385 MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages, 386 and return the output 387 388 Input: 389 C - the matrix 390 nrqr - no of messages being processed. 391 rbuf - an array of pointers to the recieved requests 392 393 Output: 394 xdata - array of messages to be sent back 395 isz1 - size of each message 396 397 For better efficiency perhaps we should malloc separately each xdata[i], 398 then if a remalloc is required we need only copy the data for that one row 399 rather than all previous rows as it is now where a single large chunck of 400 memory is used. 401 402 */ 403 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1) 404 { 405 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 406 Mat A = c->A,B = c->B; 407 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 408 PetscErrorCode ierr; 409 PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; 410 PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; 411 PetscInt val,max1,max2,Mbs,no_malloc =0,*tmp,new_estimate,ctr; 412 PetscInt *rbuf_i,kmax,rbuf_0; 413 PetscBT xtable; 414 415 PetscFunctionBegin; 416 Mbs = c->Mbs; 417 rstart = c->rstartbs; 418 cstart = c->cstartbs; 419 ai = a->i; 420 aj = a->j; 421 bi = b->i; 422 bj = b->j; 423 garray = c->garray; 424 425 426 for (i=0,ct=0,total_sz=0; i<nrqr; ++i) { 427 rbuf_i = rbuf[i]; 428 rbuf_0 = rbuf_i[0]; 429 ct += rbuf_0; 430 for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j]; 431 } 432 433 if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs; 434 else max1 = 1; 435 mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); 436 ierr = PetscMalloc1(mem_estimate,&xdata[0]);CHKERRQ(ierr); 437 ++no_malloc; 438 ierr = PetscBTCreate(Mbs,&xtable);CHKERRQ(ierr); 439 ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); 440 441 ct3 = 0; 442 for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */ 443 rbuf_i = rbuf[i]; 444 rbuf_0 = rbuf_i[0]; 445 ct1 = 2*rbuf_0+1; 446 ct2 = ct1; 447 ct3 += ct1; 448 for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/ 449 ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr); 450 oct2 = ct2; 451 kmax = rbuf_i[2*j]; 452 for (k=0; k<kmax; k++,ct1++) { 453 row = rbuf_i[ct1]; 454 if (!PetscBTLookupSet(xtable,row)) { 455 if (!(ct3 < mem_estimate)) { 456 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 457 ierr = PetscMalloc1(new_estimate,&tmp);CHKERRQ(ierr); 458 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 459 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 460 xdata[0] = tmp; 461 mem_estimate = new_estimate; ++no_malloc; 462 for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 463 } 464 xdata[i][ct2++] = row; 465 ct3++; 466 } 467 } 468 for (k=oct2,max2=ct2; k<max2; k++) { 469 row = xdata[i][k] - rstart; 470 start = ai[row]; 471 end = ai[row+1]; 472 for (l=start; l<end; l++) { 473 val = aj[l] + cstart; 474 if (!PetscBTLookupSet(xtable,val)) { 475 if (!(ct3 < mem_estimate)) { 476 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 477 ierr = PetscMalloc1(new_estimate,&tmp);CHKERRQ(ierr); 478 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 479 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 480 xdata[0] = tmp; 481 mem_estimate = new_estimate; ++no_malloc; 482 for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 483 } 484 xdata[i][ct2++] = val; 485 ct3++; 486 } 487 } 488 start = bi[row]; 489 end = bi[row+1]; 490 for (l=start; l<end; l++) { 491 val = garray[bj[l]]; 492 if (!PetscBTLookupSet(xtable,val)) { 493 if (!(ct3 < mem_estimate)) { 494 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 495 ierr = PetscMalloc1(new_estimate,&tmp);CHKERRQ(ierr); 496 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 497 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 498 xdata[0] = tmp; 499 mem_estimate = new_estimate; ++no_malloc; 500 for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1]; 501 } 502 xdata[i][ct2++] = val; 503 ct3++; 504 } 505 } 506 } 507 /* Update the header*/ 508 xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/ 509 xdata[i][2*j-1] = rbuf_i[2*j-1]; 510 } 511 xdata[i][0] = rbuf_0; 512 xdata[i+1] = xdata[i] + ct2; 513 isz1[i] = ct2; /* size of each message */ 514 } 515 ierr = PetscBTDestroy(&xtable);CHKERRQ(ierr); 516 ierr = PetscInfo3(C,"Allocated %D bytes, required %D, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatCreateSubMatrices_MPIBAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) 521 { 522 IS *isrow_block,*iscol_block; 523 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 524 PetscErrorCode ierr; 525 PetscInt nmax,nstages_local,nstages,i,pos,max_no,N=C->cmap->N,bs=C->rmap->bs; 526 Mat_SeqBAIJ *subc; 527 Mat_SubSppt *smat; 528 529 PetscFunctionBegin; 530 /* The compression and expansion should be avoided. Doesn't point 531 out errors, might change the indices, hence buggey */ 532 ierr = PetscMalloc2(ismax+1,&isrow_block,ismax+1,&iscol_block);CHKERRQ(ierr); 533 ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,ismax,isrow,isrow_block);CHKERRQ(ierr); 534 ierr = ISCompressIndicesGeneral(N,C->cmap->n,bs,ismax,iscol,iscol_block);CHKERRQ(ierr); 535 536 /* Determine the number of stages through which submatrices are done */ 537 if (!C->cmap->N) nmax=20*1000000/sizeof(PetscInt); 538 else nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); 539 if (!nmax) nmax = 1; 540 541 if (scall == MAT_INITIAL_MATRIX) { 542 nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* local nstages */ 543 544 /* Make sure every processor loops through the nstages */ 545 ierr = MPIU_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); 546 547 /* Allocate memory to hold all the submatrices and dummy submatrices */ 548 ierr = PetscCalloc1(ismax+nstages,submat);CHKERRQ(ierr); 549 } else { /* MAT_REUSE_MATRIX */ 550 if (ismax) { 551 subc = (Mat_SeqBAIJ*)((*submat)[0]->data); 552 smat = subc->submatis1; 553 } else { /* (*submat)[0] is a dummy matrix */ 554 smat = (Mat_SubSppt*)(*submat)[0]->data; 555 } 556 if (!smat) { 557 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"MatCreateSubMatrices(...,MAT_REUSE_MATRIX,...) requires submat"); 558 } 559 nstages = smat->nstages; 560 } 561 562 for (i=0,pos=0; i<nstages; i++) { 563 if (pos+nmax <= ismax) max_no = nmax; 564 else if (pos == ismax) max_no = 0; 565 else max_no = ismax-pos; 566 567 ierr = MatCreateSubMatrices_MPIBAIJ_local(C,max_no,isrow_block+pos,iscol_block+pos,scall,*submat+pos);CHKERRQ(ierr); 568 if (!max_no && scall == MAT_INITIAL_MATRIX) { /* submat[pos] is a dummy matrix */ 569 smat = (Mat_SubSppt*)(*submat)[pos]->data; 570 smat->nstages = nstages; 571 } 572 pos += max_no; 573 } 574 575 if (scall == MAT_INITIAL_MATRIX && ismax) { 576 /* save nstages for reuse */ 577 subc = (Mat_SeqBAIJ*)((*submat)[0]->data); 578 smat = subc->submatis1; 579 smat->nstages = nstages; 580 } 581 582 for (i=0; i<ismax; i++) { 583 ierr = ISDestroy(&isrow_block[i]);CHKERRQ(ierr); 584 ierr = ISDestroy(&iscol_block[i]);CHKERRQ(ierr); 585 } 586 ierr = PetscFree2(isrow_block,iscol_block);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #if defined(PETSC_USE_CTABLE) 591 PetscErrorCode PetscGetProc(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) 592 { 593 PetscInt nGlobalNd = proc_gnode[size]; 594 PetscMPIInt fproc; 595 PetscErrorCode ierr; 596 597 PetscFunctionBegin; 598 ierr = PetscMPIIntCast((PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5)),&fproc);CHKERRQ(ierr); 599 if (fproc > size) fproc = size; 600 while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { 601 if (row < proc_gnode[fproc]) fproc--; 602 else fproc++; 603 } 604 *rank = fproc; 605 PetscFunctionReturn(0); 606 } 607 #endif 608 609 /* -------------------------------------------------------------------------*/ 610 /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */ 611 PetscErrorCode MatCreateSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) 612 { 613 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 614 Mat A = c->A; 615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*subc; 616 const PetscInt **icol,**irow; 617 PetscInt **iicol,**iirow; 618 PetscInt *nrow,*ncol,start; 619 PetscErrorCode ierr; 620 PetscMPIInt rank,size,tag0,tag2,tag3,tag4,*w1,*w2,*w3,*w4,nrqr; 621 PetscInt **sbuf1,**sbuf2,*sbuf2_i,i,j,k,l,ct1,ct2,**rbuf1,row,proc=-1; 622 PetscInt nrqs=0,msz,**ptr=NULL,*req_size=NULL,*ctr=NULL,*pa,*tmp=NULL,tcol; 623 PetscInt **rbuf3=NULL,*req_source1=NULL,*req_source2,**sbuf_aj,**rbuf2=NULL,max1,max2; 624 PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; 625 #if defined(PETSC_USE_CTABLE) 626 PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i; 627 #else 628 PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i; 629 #endif 630 const PetscInt *irow_i,*icol_i; 631 PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; 632 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; 633 MPI_Request *r_waits4,*s_waits3,*s_waits4; 634 MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; 635 MPI_Status *r_status3,*r_status4,*s_status4; 636 MPI_Comm comm; 637 PetscScalar **rbuf4,*rbuf4_i=NULL,**sbuf_aa,*vals,*mat_a=NULL,*imat_a,*sbuf_aa_i; 638 PetscMPIInt *onodes1,*olengths1,end; 639 PetscInt **row2proc,*row2proc_i,*imat_ilen,*imat_j,*imat_i; 640 Mat_SubSppt *smat_i; 641 PetscBool *issorted,colflag,iscsorted=PETSC_TRUE; 642 PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,ilen; 643 PetscInt bs=C->rmap->bs,bs2=c->bs2,rstart = c->rstartbs; 644 PetscBool ijonly=c->ijonly; /* private flag indicates only matrix data structures are requested */ 645 PetscInt nzA,nzB,*a_i=a->i,*b_i=b->i,*a_j = a->j,*b_j = b->j,ctmp,imark,*cworkA,*cworkB; 646 PetscScalar *vworkA=NULL,*vworkB=NULL,*a_a = a->a,*b_a = b->a; 647 PetscInt cstart = c->cstartbs,*bmap = c->garray; 648 PetscBool *allrows,*allcolumns; 649 650 PetscFunctionBegin; 651 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 652 size = c->size; 653 rank = c->rank; 654 655 ierr = PetscMalloc5(ismax,&row2proc,ismax,&cmap,ismax,&rmap,ismax+1,&allcolumns,ismax,&allrows);CHKERRQ(ierr); 656 ierr = PetscMalloc5(ismax,(PetscInt***)&irow,ismax,(PetscInt***)&icol,ismax,&nrow,ismax,&ncol,ismax,&issorted);CHKERRQ(ierr); 657 658 for (i=0; i<ismax; i++) { 659 ierr = ISSorted(iscol[i],&issorted[i]);CHKERRQ(ierr); 660 if (!issorted[i]) iscsorted = issorted[i]; /* columns are not sorted! */ 661 ierr = ISSorted(isrow[i],&issorted[i]);CHKERRQ(ierr); 662 663 /* Check for special case: allcolumns */ 664 ierr = ISIdentity(iscol[i],&colflag);CHKERRQ(ierr); 665 ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); 666 667 if (colflag && ncol[i] == c->Nbs) { 668 allcolumns[i] = PETSC_TRUE; 669 icol[i] = NULL; 670 } else { 671 allcolumns[i] = PETSC_FALSE; 672 ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); 673 } 674 675 /* Check for special case: allrows */ 676 ierr = ISIdentity(isrow[i],&colflag);CHKERRQ(ierr); 677 ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); 678 if (colflag && nrow[i] == c->Mbs) { 679 allrows[i] = PETSC_TRUE; 680 irow[i] = NULL; 681 } else { 682 allrows[i] = PETSC_FALSE; 683 ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); 684 } 685 } 686 687 if (scall == MAT_REUSE_MATRIX) { 688 /* Assumes new rows are same length as the old rows */ 689 for (i=0; i<ismax; i++) { 690 subc = (Mat_SeqBAIJ*)(submats[i]->data); 691 if (subc->mbs != nrow[i] || subc->nbs != ncol[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 692 693 /* Initial matrix as if empty */ 694 ierr = PetscMemzero(subc->ilen,subc->mbs*sizeof(PetscInt));CHKERRQ(ierr); 695 696 /* Initial matrix as if empty */ 697 submats[i]->factortype = C->factortype; 698 699 smat_i = subc->submatis1; 700 701 nrqs = smat_i->nrqs; 702 nrqr = smat_i->nrqr; 703 rbuf1 = smat_i->rbuf1; 704 rbuf2 = smat_i->rbuf2; 705 rbuf3 = smat_i->rbuf3; 706 req_source2 = smat_i->req_source2; 707 708 sbuf1 = smat_i->sbuf1; 709 sbuf2 = smat_i->sbuf2; 710 ptr = smat_i->ptr; 711 tmp = smat_i->tmp; 712 ctr = smat_i->ctr; 713 714 pa = smat_i->pa; 715 req_size = smat_i->req_size; 716 req_source1 = smat_i->req_source1; 717 718 allcolumns[i] = smat_i->allcolumns; 719 allrows[i] = smat_i->allrows; 720 row2proc[i] = smat_i->row2proc; 721 rmap[i] = smat_i->rmap; 722 cmap[i] = smat_i->cmap; 723 } 724 725 if (!ismax){ /* Get dummy submatrices and retrieve struct submatis1 */ 726 if (!submats[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"submats are null, cannot reuse"); 727 smat_i = (Mat_SubSppt*)submats[0]->data; 728 729 nrqs = smat_i->nrqs; 730 nrqr = smat_i->nrqr; 731 rbuf1 = smat_i->rbuf1; 732 rbuf2 = smat_i->rbuf2; 733 rbuf3 = smat_i->rbuf3; 734 req_source2 = smat_i->req_source2; 735 736 sbuf1 = smat_i->sbuf1; 737 sbuf2 = smat_i->sbuf2; 738 ptr = smat_i->ptr; 739 tmp = smat_i->tmp; 740 ctr = smat_i->ctr; 741 742 pa = smat_i->pa; 743 req_size = smat_i->req_size; 744 req_source1 = smat_i->req_source1; 745 746 allcolumns[0] = PETSC_FALSE; 747 } 748 } else { /* scall == MAT_INITIAL_MATRIX */ 749 /* Get some new tags to keep the communication clean */ 750 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 751 ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); 752 753 /* evaluate communication - mesg to who, length of mesg, and buffer space 754 required. Based on this, buffers are allocated, and data copied into them*/ 755 ierr = PetscCalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); /* mesg size, initialize work vectors */ 756 757 for (i=0; i<ismax; i++) { 758 jmax = nrow[i]; 759 irow_i = irow[i]; 760 761 ierr = PetscMalloc1(jmax,&row2proc_i);CHKERRQ(ierr); 762 row2proc[i] = row2proc_i; 763 764 if (issorted[i]) proc = 0; 765 for (j=0; j<jmax; j++) { 766 if (!issorted[i]) proc = 0; 767 if (allrows[i]) row = j; 768 else row = irow_i[j]; 769 770 while (row >= c->rangebs[proc+1]) proc++; 771 w4[proc]++; 772 row2proc_i[j] = proc; /* map row index to proc */ 773 } 774 for (j=0; j<size; j++) { 775 if (w4[j]) { w1[j] += w4[j]; w3[j]++; w4[j] = 0;} 776 } 777 } 778 779 nrqs = 0; /* no of outgoing messages */ 780 msz = 0; /* total mesg length (for all procs) */ 781 w1[rank] = 0; /* no mesg sent to self */ 782 w3[rank] = 0; 783 for (i=0; i<size; i++) { 784 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 785 } 786 ierr = PetscMalloc1(nrqs+1,&pa);CHKERRQ(ierr); /*(proc -array)*/ 787 for (i=0,j=0; i<size; i++) { 788 if (w1[i]) { pa[j] = i; j++; } 789 } 790 791 /* Each message would have a header = 1 + 2*(no of IS) + data */ 792 for (i=0; i<nrqs; i++) { 793 j = pa[i]; 794 w1[j] += w2[j] + 2* w3[j]; 795 msz += w1[j]; 796 } 797 ierr = PetscInfo2(0,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);CHKERRQ(ierr); 798 799 /* Determine the number of messages to expect, their lengths, from from-ids */ 800 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 801 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 802 803 /* Now post the Irecvs corresponding to these messages */ 804 tag0 = ((PetscObject)C)->tag; 805 ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); 806 807 ierr = PetscFree(onodes1);CHKERRQ(ierr); 808 ierr = PetscFree(olengths1);CHKERRQ(ierr); 809 810 /* Allocate Memory for outgoing messages */ 811 ierr = PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);CHKERRQ(ierr); 812 ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); 813 ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 814 815 { 816 PetscInt *iptr = tmp; 817 k = 0; 818 for (i=0; i<nrqs; i++) { 819 j = pa[i]; 820 iptr += k; 821 sbuf1[j] = iptr; 822 k = w1[j]; 823 } 824 } 825 826 /* Form the outgoing messages. Initialize the header space */ 827 for (i=0; i<nrqs; i++) { 828 j = pa[i]; 829 sbuf1[j][0] = 0; 830 ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 831 ptr[j] = sbuf1[j] + 2*w3[j] + 1; 832 } 833 834 /* Parse the isrow and copy data into outbuf */ 835 for (i=0; i<ismax; i++) { 836 row2proc_i = row2proc[i]; 837 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 838 irow_i = irow[i]; 839 jmax = nrow[i]; 840 for (j=0; j<jmax; j++) { /* parse the indices of each IS */ 841 proc = row2proc_i[j]; 842 if (allrows[i]) row = j; 843 else row = irow_i[j]; 844 845 if (proc != rank) { /* copy to the outgoing buf*/ 846 ctr[proc]++; 847 *ptr[proc] = row; 848 ptr[proc]++; 849 } 850 } 851 /* Update the headers for the current IS */ 852 for (j=0; j<size; j++) { /* Can Optimise this loop too */ 853 if ((ctr_j = ctr[j])) { 854 sbuf1_j = sbuf1[j]; 855 k = ++sbuf1_j[0]; 856 sbuf1_j[2*k] = ctr_j; 857 sbuf1_j[2*k-1] = i; 858 } 859 } 860 } 861 862 /* Now post the sends */ 863 ierr = PetscMalloc1(nrqs+1,&s_waits1);CHKERRQ(ierr); 864 for (i=0; i<nrqs; ++i) { 865 j = pa[i]; 866 ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); 867 } 868 869 /* Post Receives to capture the buffer size */ 870 ierr = PetscMalloc1(nrqs+1,&r_waits2);CHKERRQ(ierr); 871 ierr = PetscMalloc3(nrqs+1,&req_source2,nrqs+1,&rbuf2,nrqs+1,&rbuf3);CHKERRQ(ierr); 872 rbuf2[0] = tmp + msz; 873 for (i=1; i<nrqs; ++i) { 874 rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 875 } 876 for (i=0; i<nrqs; ++i) { 877 j = pa[i]; 878 ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag2,comm,r_waits2+i);CHKERRQ(ierr); 879 } 880 881 /* Send to other procs the buf size they should allocate */ 882 /* Receive messages*/ 883 ierr = PetscMalloc1(nrqr+1,&s_waits2);CHKERRQ(ierr); 884 ierr = PetscMalloc1(nrqr+1,&r_status1);CHKERRQ(ierr); 885 ierr = PetscMalloc3(nrqr,&sbuf2,nrqr,&req_size,nrqr,&req_source1);CHKERRQ(ierr); 886 887 ierr = MPI_Waitall(nrqr,r_waits1,r_status1);CHKERRQ(ierr); 888 for (i=0; i<nrqr; ++i) { 889 req_size[i] = 0; 890 rbuf1_i = rbuf1[i]; 891 start = 2*rbuf1_i[0] + 1; 892 ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); 893 ierr = PetscMalloc1(end+1,&sbuf2[i]);CHKERRQ(ierr); 894 sbuf2_i = sbuf2[i]; 895 for (j=start; j<end; j++) { 896 row = rbuf1_i[j] - rstart; 897 ncols = a_i[row+1] - a_i[row] + b_i[row+1] - b_i[row]; 898 sbuf2_i[j] = ncols; 899 req_size[i] += ncols; 900 } 901 req_source1[i] = r_status1[i].MPI_SOURCE; 902 /* form the header */ 903 sbuf2_i[0] = req_size[i]; 904 for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j]; 905 906 ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source1[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 907 } 908 909 ierr = PetscFree(r_status1);CHKERRQ(ierr); 910 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 911 ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 912 913 /* Receive messages*/ 914 ierr = PetscMalloc1(nrqs+1,&r_waits3);CHKERRQ(ierr); 915 ierr = PetscMalloc1(nrqs+1,&r_status2);CHKERRQ(ierr); 916 917 ierr = MPI_Waitall(nrqs,r_waits2,r_status2);CHKERRQ(ierr); 918 for (i=0; i<nrqs; ++i) { 919 ierr = PetscMalloc1(rbuf2[i][0]+1,&rbuf3[i]);CHKERRQ(ierr); 920 req_source2[i] = r_status2[i].MPI_SOURCE; 921 ierr = MPI_Irecv(rbuf3[i],rbuf2[i][0],MPIU_INT,req_source2[i],tag3,comm,r_waits3+i);CHKERRQ(ierr); 922 } 923 ierr = PetscFree(r_status2);CHKERRQ(ierr); 924 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 925 926 /* Wait on sends1 and sends2 */ 927 ierr = PetscMalloc1(nrqs+1,&s_status1);CHKERRQ(ierr); 928 ierr = PetscMalloc1(nrqr+1,&s_status2);CHKERRQ(ierr); 929 930 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} 931 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} 932 ierr = PetscFree(s_status1);CHKERRQ(ierr); 933 ierr = PetscFree(s_status2);CHKERRQ(ierr); 934 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 935 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 936 937 /* Now allocate sending buffers for a->j, and send them off */ 938 ierr = PetscMalloc1(nrqr+1,&sbuf_aj);CHKERRQ(ierr); 939 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 940 ierr = PetscMalloc1(j+1,&sbuf_aj[0]);CHKERRQ(ierr); 941 for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 942 943 ierr = PetscMalloc1(nrqr+1,&s_waits3);CHKERRQ(ierr); 944 { 945 946 for (i=0; i<nrqr; i++) { 947 rbuf1_i = rbuf1[i]; 948 sbuf_aj_i = sbuf_aj[i]; 949 ct1 = 2*rbuf1_i[0] + 1; 950 ct2 = 0; 951 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 952 kmax = rbuf1[i][2*j]; 953 for (k=0; k<kmax; k++,ct1++) { 954 row = rbuf1_i[ct1] - rstart; 955 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 956 ncols = nzA + nzB; 957 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 958 959 /* load the column indices for this row into cols */ 960 cols = sbuf_aj_i + ct2; 961 for (l=0; l<nzB; l++) { 962 if ((ctmp = bmap[cworkB[l]]) < cstart) cols[l] = ctmp; 963 else break; 964 } 965 imark = l; 966 for (l=0; l<nzA; l++) {cols[imark+l] = cstart + cworkA[l];} 967 for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]]; 968 ct2 += ncols; 969 } 970 } 971 ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source1[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 972 } 973 } 974 ierr = PetscMalloc2(nrqs+1,&r_status3,nrqr+1,&s_status3);CHKERRQ(ierr); 975 976 /* create col map: global col of C -> local col of submatrices */ 977 #if defined(PETSC_USE_CTABLE) 978 for (i=0; i<ismax; i++) { 979 if (!allcolumns[i]) { 980 ierr = PetscTableCreate(ncol[i]+1,c->Nbs+1,&cmap[i]);CHKERRQ(ierr); 981 982 jmax = ncol[i]; 983 icol_i = icol[i]; 984 cmap_i = cmap[i]; 985 for (j=0; j<jmax; j++) { 986 ierr = PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 987 } 988 } else cmap[i] = NULL; 989 } 990 #else 991 for (i=0; i<ismax; i++) { 992 if (!allcolumns[i]) { 993 ierr = PetscCalloc1(c->Nbs,&cmap[i]);CHKERRQ(ierr); 994 jmax = ncol[i]; 995 icol_i = icol[i]; 996 cmap_i = cmap[i]; 997 for (j=0; j<jmax; j++) cmap_i[icol_i[j]] = j+1; 998 } else cmap[i] = NULL; 999 } 1000 #endif 1001 1002 /* Create lens which is required for MatCreate... */ 1003 for (i=0,j=0; i<ismax; i++) j += nrow[i]; 1004 ierr = PetscMalloc1(ismax,&lens);CHKERRQ(ierr); 1005 1006 if (ismax) { 1007 ierr = PetscCalloc1(j,&lens[0]);CHKERRQ(ierr); 1008 } 1009 for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1]; 1010 1011 /* Update lens from local data */ 1012 for (i=0; i<ismax; i++) { 1013 row2proc_i = row2proc[i]; 1014 jmax = nrow[i]; 1015 if (!allcolumns[i]) cmap_i = cmap[i]; 1016 irow_i = irow[i]; 1017 lens_i = lens[i]; 1018 for (j=0; j<jmax; j++) { 1019 if (allrows[i]) row = j; 1020 else row = irow_i[j]; /* global blocked row of C */ 1021 1022 proc = row2proc_i[j]; 1023 if (proc == rank) { 1024 /* Get indices from matA and then from matB */ 1025 #if defined(PETSC_USE_CTABLE) 1026 PetscInt tt; 1027 #endif 1028 row = row - rstart; 1029 nzA = a_i[row+1] - a_i[row]; 1030 nzB = b_i[row+1] - b_i[row]; 1031 cworkA = a_j + a_i[row]; 1032 cworkB = b_j + b_i[row]; 1033 1034 if (!allcolumns[i]) { 1035 #if defined(PETSC_USE_CTABLE) 1036 for (k=0; k<nzA; k++) { 1037 ierr = PetscTableFind(cmap_i,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr); 1038 if (tt) lens_i[j]++; 1039 } 1040 for (k=0; k<nzB; k++) { 1041 ierr = PetscTableFind(cmap_i,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr); 1042 if (tt) lens_i[j]++; 1043 } 1044 1045 #else 1046 for (k=0; k<nzA; k++) { 1047 if (cmap_i[cstart + cworkA[k]]) lens_i[j]++; 1048 } 1049 for (k=0; k<nzB; k++) { 1050 if (cmap_i[bmap[cworkB[k]]]) lens_i[j]++; 1051 } 1052 #endif 1053 } else { /* allcolumns */ 1054 lens_i[j] = nzA + nzB; 1055 } 1056 } 1057 } 1058 } 1059 1060 /* Create row map: global row of C -> local row of submatrices */ 1061 for (i=0; i<ismax; i++) { 1062 if (!allrows[i]) { 1063 #if defined(PETSC_USE_CTABLE) 1064 ierr = PetscTableCreate(nrow[i]+1,c->Mbs+1,&rmap[i]);CHKERRQ(ierr); 1065 irow_i = irow[i]; 1066 jmax = nrow[i]; 1067 for (j=0; j<jmax; j++) { 1068 if (allrows[i]) { 1069 ierr = PetscTableAdd(rmap[i],j+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1070 } else { 1071 ierr = PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);CHKERRQ(ierr); 1072 } 1073 } 1074 #else 1075 ierr = PetscCalloc1(c->Mbs,&rmap[i]);CHKERRQ(ierr); 1076 rmap_i = rmap[i]; 1077 irow_i = irow[i]; 1078 jmax = nrow[i]; 1079 for (j=0; j<jmax; j++) { 1080 if (allrows[i]) rmap_i[j] = j; 1081 else rmap_i[irow_i[j]] = j; 1082 } 1083 #endif 1084 } else rmap[i] = NULL; 1085 } 1086 1087 /* Update lens from offproc data */ 1088 { 1089 PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i; 1090 1091 ierr = MPI_Waitall(nrqs,r_waits3,r_status3);CHKERRQ(ierr); 1092 for (tmp2=0; tmp2<nrqs; tmp2++) { 1093 sbuf1_i = sbuf1[pa[tmp2]]; 1094 jmax = sbuf1_i[0]; 1095 ct1 = 2*jmax+1; 1096 ct2 = 0; 1097 rbuf2_i = rbuf2[tmp2]; 1098 rbuf3_i = rbuf3[tmp2]; 1099 for (j=1; j<=jmax; j++) { 1100 is_no = sbuf1_i[2*j-1]; 1101 max1 = sbuf1_i[2*j]; 1102 lens_i = lens[is_no]; 1103 if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1104 rmap_i = rmap[is_no]; 1105 for (k=0; k<max1; k++,ct1++) { 1106 if (allrows[is_no]) { 1107 row = sbuf1_i[ct1]; 1108 } else { 1109 #if defined(PETSC_USE_CTABLE) 1110 ierr = PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr); 1111 row--; 1112 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1113 #else 1114 row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */ 1115 #endif 1116 } 1117 max2 = rbuf2_i[ct1]; 1118 for (l=0; l<max2; l++,ct2++) { 1119 if (!allcolumns[is_no]) { 1120 #if defined(PETSC_USE_CTABLE) 1121 ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 1122 #else 1123 tcol = cmap_i[rbuf3_i[ct2]]; 1124 #endif 1125 if (tcol) lens_i[row]++; 1126 } else { /* allcolumns */ 1127 lens_i[row]++; /* lens_i[row] += max2 ? */ 1128 } 1129 } 1130 } 1131 } 1132 } 1133 } 1134 ierr = PetscFree(r_waits3);CHKERRQ(ierr); 1135 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);} 1136 ierr = PetscFree2(r_status3,s_status3);CHKERRQ(ierr); 1137 ierr = PetscFree(s_waits3);CHKERRQ(ierr); 1138 1139 /* Create the submatrices */ 1140 for (i=0; i<ismax; i++) { 1141 PetscInt bs_tmp; 1142 if (ijonly) bs_tmp = 1; 1143 else bs_tmp = bs; 1144 1145 ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); 1146 ierr = MatSetSizes(submats[i],nrow[i]*bs_tmp,ncol[i]*bs_tmp,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1147 1148 ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); 1149 ierr = MatSeqBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); 1150 ierr = MatSeqSBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by SBAIJ routines */ 1151 1152 /* create struct Mat_SubSppt and attached it to submat */ 1153 ierr = PetscNew(&smat_i);CHKERRQ(ierr); 1154 subc = (Mat_SeqBAIJ*)submats[i]->data; 1155 subc->submatis1 = smat_i; 1156 1157 smat_i->destroy = submats[i]->ops->destroy; 1158 submats[i]->ops->destroy = MatDestroySubMatrix_SeqBAIJ; 1159 submats[i]->factortype = C->factortype; 1160 1161 smat_i->id = i; 1162 smat_i->nrqs = nrqs; 1163 smat_i->nrqr = nrqr; 1164 smat_i->rbuf1 = rbuf1; 1165 smat_i->rbuf2 = rbuf2; 1166 smat_i->rbuf3 = rbuf3; 1167 smat_i->sbuf2 = sbuf2; 1168 smat_i->req_source2 = req_source2; 1169 1170 smat_i->sbuf1 = sbuf1; 1171 smat_i->ptr = ptr; 1172 smat_i->tmp = tmp; 1173 smat_i->ctr = ctr; 1174 1175 smat_i->pa = pa; 1176 smat_i->req_size = req_size; 1177 smat_i->req_source1 = req_source1; 1178 1179 smat_i->allcolumns = allcolumns[i]; 1180 smat_i->allrows = allrows[i]; 1181 smat_i->singleis = PETSC_FALSE; 1182 smat_i->row2proc = row2proc[i]; 1183 smat_i->rmap = rmap[i]; 1184 smat_i->cmap = cmap[i]; 1185 } 1186 1187 if (!ismax) { /* Create dummy submats[0] for reuse struct subc */ 1188 ierr = MatCreate(PETSC_COMM_SELF,&submats[0]);CHKERRQ(ierr); 1189 ierr = MatSetSizes(submats[0],0,0,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1190 ierr = MatSetType(submats[0],MATDUMMY);CHKERRQ(ierr); 1191 1192 /* create struct Mat_SubSppt and attached it to submat */ 1193 ierr = PetscNewLog(submats[0],&smat_i);CHKERRQ(ierr); 1194 submats[0]->data = (void*)smat_i; 1195 1196 smat_i->destroy = submats[0]->ops->destroy; 1197 submats[0]->ops->destroy = MatDestroySubMatrix_Dummy; 1198 submats[0]->factortype = C->factortype; 1199 1200 smat_i->id = 0; 1201 smat_i->nrqs = nrqs; 1202 smat_i->nrqr = nrqr; 1203 smat_i->rbuf1 = rbuf1; 1204 smat_i->rbuf2 = rbuf2; 1205 smat_i->rbuf3 = rbuf3; 1206 smat_i->sbuf2 = sbuf2; 1207 smat_i->req_source2 = req_source2; 1208 1209 smat_i->sbuf1 = sbuf1; 1210 smat_i->ptr = ptr; 1211 smat_i->tmp = tmp; 1212 smat_i->ctr = ctr; 1213 1214 smat_i->pa = pa; 1215 smat_i->req_size = req_size; 1216 smat_i->req_source1 = req_source1; 1217 1218 smat_i->allcolumns = PETSC_FALSE; 1219 smat_i->singleis = PETSC_FALSE; 1220 smat_i->row2proc = NULL; 1221 smat_i->rmap = NULL; 1222 smat_i->cmap = NULL; 1223 } 1224 1225 1226 if (ismax) {ierr = PetscFree(lens[0]);CHKERRQ(ierr);} 1227 ierr = PetscFree(lens);CHKERRQ(ierr); 1228 ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); 1229 ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); 1230 1231 } /* endof scall == MAT_INITIAL_MATRIX */ 1232 1233 /* Post recv matrix values */ 1234 if (!ijonly) { 1235 ierr = PetscObjectGetNewTag((PetscObject)C,&tag4);CHKERRQ(ierr); 1236 ierr = PetscMalloc1(nrqs+1,&rbuf4);CHKERRQ(ierr); 1237 ierr = PetscMalloc1(nrqs+1,&r_waits4);CHKERRQ(ierr); 1238 ierr = PetscMalloc1(nrqs+1,&r_status4);CHKERRQ(ierr); 1239 ierr = PetscMalloc1(nrqr+1,&s_status4);CHKERRQ(ierr); 1240 for (i=0; i<nrqs; ++i) { 1241 ierr = PetscMalloc1(rbuf2[i][0]*bs2,&rbuf4[i]);CHKERRQ(ierr); 1242 ierr = MPI_Irecv(rbuf4[i],rbuf2[i][0]*bs2,MPIU_SCALAR,req_source2[i],tag4,comm,r_waits4+i);CHKERRQ(ierr); 1243 } 1244 1245 /* Allocate sending buffers for a->a, and send them off */ 1246 ierr = PetscMalloc1(nrqr+1,&sbuf_aa);CHKERRQ(ierr); 1247 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 1248 1249 ierr = PetscMalloc1((j+1)*bs2,&sbuf_aa[0]);CHKERRQ(ierr); 1250 for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2; 1251 1252 ierr = PetscMalloc1(nrqr+1,&s_waits4);CHKERRQ(ierr); 1253 1254 for (i=0; i<nrqr; i++) { 1255 rbuf1_i = rbuf1[i]; 1256 sbuf_aa_i = sbuf_aa[i]; 1257 ct1 = 2*rbuf1_i[0]+1; 1258 ct2 = 0; 1259 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 1260 kmax = rbuf1_i[2*j]; 1261 for (k=0; k<kmax; k++,ct1++) { 1262 row = rbuf1_i[ct1] - rstart; 1263 nzA = a_i[row+1] - a_i[row]; 1264 nzB = b_i[row+1] - b_i[row]; 1265 ncols = nzA + nzB; 1266 cworkB = b_j + b_i[row]; 1267 vworkA = a_a + a_i[row]*bs2; 1268 vworkB = b_a + b_i[row]*bs2; 1269 1270 /* load the column values for this row into vals*/ 1271 vals = sbuf_aa_i+ct2*bs2; 1272 for (l=0; l<nzB; l++) { 1273 if ((bmap[cworkB[l]]) < cstart) { 1274 ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1275 } else break; 1276 } 1277 imark = l; 1278 for (l=0; l<nzA; l++) { 1279 ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1280 } 1281 for (l=imark; l<nzB; l++) { 1282 ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1283 } 1284 1285 ct2 += ncols; 1286 } 1287 } 1288 ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_SCALAR,req_source1[i],tag4,comm,s_waits4+i);CHKERRQ(ierr); 1289 } 1290 } 1291 1292 /* Assemble the matrices */ 1293 /* First assemble the local rows */ 1294 for (i=0; i<ismax; i++) { 1295 row2proc_i = row2proc[i]; 1296 subc = (Mat_SeqBAIJ*)submats[i]->data; 1297 imat_ilen = subc->ilen; 1298 imat_j = subc->j; 1299 imat_i = subc->i; 1300 imat_a = subc->a; 1301 1302 if (!allcolumns[i]) cmap_i = cmap[i]; 1303 rmap_i = rmap[i]; 1304 irow_i = irow[i]; 1305 jmax = nrow[i]; 1306 for (j=0; j<jmax; j++) { 1307 if (allrows[i]) row = j; 1308 else row = irow_i[j]; 1309 proc = row2proc_i[j]; 1310 1311 if (proc == rank) { 1312 1313 row = row - rstart; 1314 nzA = a_i[row+1] - a_i[row]; 1315 nzB = b_i[row+1] - b_i[row]; 1316 cworkA = a_j + a_i[row]; 1317 cworkB = b_j + b_i[row]; 1318 if (!ijonly) { 1319 vworkA = a_a + a_i[row]*bs2; 1320 vworkB = b_a + b_i[row]*bs2; 1321 } 1322 1323 if (allrows[i]) { 1324 row = row+rstart; 1325 } else { 1326 #if defined(PETSC_USE_CTABLE) 1327 ierr = PetscTableFind(rmap_i,row+rstart+1,&row);CHKERRQ(ierr); 1328 row--; 1329 1330 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1331 #else 1332 row = rmap_i[row + rstart]; 1333 #endif 1334 } 1335 mat_i = imat_i[row]; 1336 if (!ijonly) mat_a = imat_a + mat_i*bs2; 1337 mat_j = imat_j + mat_i; 1338 ilen = imat_ilen[row]; 1339 1340 /* load the column indices for this row into cols*/ 1341 if (!allcolumns[i]) { 1342 for (l=0; l<nzB; l++) { 1343 if ((ctmp = bmap[cworkB[l]]) < cstart) { 1344 #if defined(PETSC_USE_CTABLE) 1345 ierr = PetscTableFind(cmap_i,ctmp+1,&tcol);CHKERRQ(ierr); 1346 if (tcol) { 1347 #else 1348 if ((tcol = cmap_i[ctmp])) { 1349 #endif 1350 *mat_j++ = tcol - 1; 1351 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1352 mat_a += bs2; 1353 ilen++; 1354 } 1355 } else break; 1356 } 1357 imark = l; 1358 for (l=0; l<nzA; l++) { 1359 #if defined(PETSC_USE_CTABLE) 1360 ierr = PetscTableFind(cmap_i,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr); 1361 if (tcol) { 1362 #else 1363 if ((tcol = cmap_i[cstart + cworkA[l]])) { 1364 #endif 1365 *mat_j++ = tcol - 1; 1366 if (!ijonly) { 1367 ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1368 mat_a += bs2; 1369 } 1370 ilen++; 1371 } 1372 } 1373 for (l=imark; l<nzB; l++) { 1374 #if defined(PETSC_USE_CTABLE) 1375 ierr = PetscTableFind(cmap_i,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr); 1376 if (tcol) { 1377 #else 1378 if ((tcol = cmap_i[bmap[cworkB[l]]])) { 1379 #endif 1380 *mat_j++ = tcol - 1; 1381 if (!ijonly) { 1382 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1383 mat_a += bs2; 1384 } 1385 ilen++; 1386 } 1387 } 1388 } else { /* allcolumns */ 1389 for (l=0; l<nzB; l++) { 1390 if ((ctmp = bmap[cworkB[l]]) < cstart) { 1391 *mat_j++ = ctmp; 1392 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1393 mat_a += bs2; 1394 ilen++; 1395 } else break; 1396 } 1397 imark = l; 1398 for (l=0; l<nzA; l++) { 1399 *mat_j++ = cstart+cworkA[l]; 1400 if (!ijonly) { 1401 ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1402 mat_a += bs2; 1403 } 1404 ilen++; 1405 } 1406 for (l=imark; l<nzB; l++) { 1407 *mat_j++ = bmap[cworkB[l]]; 1408 if (!ijonly) { 1409 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1410 mat_a += bs2; 1411 } 1412 ilen++; 1413 } 1414 } 1415 imat_ilen[row] = ilen; 1416 } 1417 } 1418 } 1419 1420 /* Now assemble the off proc rows */ 1421 if (!ijonly) { 1422 ierr = MPI_Waitall(nrqs,r_waits4,r_status4);CHKERRQ(ierr); 1423 } 1424 for (tmp2=0; tmp2<nrqs; tmp2++) { 1425 sbuf1_i = sbuf1[pa[tmp2]]; 1426 jmax = sbuf1_i[0]; 1427 ct1 = 2*jmax + 1; 1428 ct2 = 0; 1429 rbuf2_i = rbuf2[tmp2]; 1430 rbuf3_i = rbuf3[tmp2]; 1431 if (!ijonly) rbuf4_i = rbuf4[tmp2]; 1432 for (j=1; j<=jmax; j++) { 1433 is_no = sbuf1_i[2*j-1]; 1434 rmap_i = rmap[is_no]; 1435 if (!allcolumns[is_no]) cmap_i = cmap[is_no]; 1436 subc = (Mat_SeqBAIJ*)submats[is_no]->data; 1437 imat_ilen = subc->ilen; 1438 imat_j = subc->j; 1439 imat_i = subc->i; 1440 if (!ijonly) imat_a = subc->a; 1441 max1 = sbuf1_i[2*j]; 1442 for (k=0; k<max1; k++,ct1++) { /* for each recved block row */ 1443 row = sbuf1_i[ct1]; 1444 1445 if (allrows[is_no]) { 1446 row = sbuf1_i[ct1]; 1447 } else { 1448 #if defined(PETSC_USE_CTABLE) 1449 ierr = PetscTableFind(rmap_i,row+1,&row);CHKERRQ(ierr); 1450 row--; 1451 if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); 1452 #else 1453 row = rmap_i[row]; 1454 #endif 1455 } 1456 ilen = imat_ilen[row]; 1457 mat_i = imat_i[row]; 1458 if (!ijonly) mat_a = imat_a + mat_i*bs2; 1459 mat_j = imat_j + mat_i; 1460 max2 = rbuf2_i[ct1]; 1461 if (!allcolumns[is_no]) { 1462 for (l=0; l<max2; l++,ct2++) { 1463 #if defined(PETSC_USE_CTABLE) 1464 ierr = PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 1465 #else 1466 tcol = cmap_i[rbuf3_i[ct2]]; 1467 #endif 1468 if (tcol) { 1469 *mat_j++ = tcol - 1; 1470 if (!ijonly) { 1471 ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1472 mat_a += bs2; 1473 } 1474 ilen++; 1475 } 1476 } 1477 } else { /* allcolumns */ 1478 for (l=0; l<max2; l++,ct2++) { 1479 *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */ 1480 if (!ijonly) { 1481 ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 mat_a += bs2; 1483 } 1484 ilen++; 1485 } 1486 } 1487 imat_ilen[row] = ilen; 1488 } 1489 } 1490 } 1491 1492 if (!iscsorted) { /* sort column indices of the rows */ 1493 MatScalar *work; 1494 1495 ierr = PetscMalloc1(bs2,&work);CHKERRQ(ierr); 1496 for (i=0; i<ismax; i++) { 1497 subc = (Mat_SeqBAIJ*)submats[i]->data; 1498 imat_ilen = subc->ilen; 1499 imat_j = subc->j; 1500 imat_i = subc->i; 1501 if (!ijonly) imat_a = subc->a; 1502 if (allcolumns[i]) continue; 1503 1504 jmax = nrow[i]; 1505 for (j=0; j<jmax; j++) { 1506 mat_i = imat_i[j]; 1507 mat_j = imat_j + mat_i; 1508 ilen = imat_ilen[j]; 1509 if (ijonly) { 1510 ierr = PetscSortInt(ilen,mat_j);CHKERRQ(ierr); 1511 } else { 1512 mat_a = imat_a + mat_i*bs2; 1513 ierr = PetscSortIntWithDataArray(ilen,mat_j,mat_a,bs2*sizeof(MatScalar),work);CHKERRQ(ierr); 1514 } 1515 } 1516 } 1517 ierr = PetscFree(work);CHKERRQ(ierr); 1518 } 1519 1520 if (!ijonly) { 1521 ierr = PetscFree(r_status4);CHKERRQ(ierr); 1522 ierr = PetscFree(r_waits4);CHKERRQ(ierr); 1523 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} 1524 ierr = PetscFree(s_waits4);CHKERRQ(ierr); 1525 ierr = PetscFree(s_status4);CHKERRQ(ierr); 1526 } 1527 1528 /* Restore the indices */ 1529 for (i=0; i<ismax; i++) { 1530 if (!allrows[i]) { 1531 ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); 1532 } 1533 if (!allcolumns[i]) { 1534 ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); 1535 } 1536 } 1537 1538 for (i=0; i<ismax; i++) { 1539 ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1540 ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1541 } 1542 1543 iirow = (PetscInt**) irow; /* required because PetscFreeN() cannot work on const */ 1544 iicol = (PetscInt**) icol; 1545 ierr = PetscFree5(iirow,iicol,nrow,ncol,issorted);CHKERRQ(ierr); 1546 ierr = PetscFree5(row2proc,cmap,rmap,allcolumns,allrows);CHKERRQ(ierr); 1547 1548 if (!ijonly) { 1549 ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr); 1550 ierr = PetscFree(sbuf_aa);CHKERRQ(ierr); 1551 1552 for (i=0; i<nrqs; ++i) { 1553 ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr); 1554 } 1555 ierr = PetscFree(rbuf4);CHKERRQ(ierr); 1556 } 1557 c->ijonly = PETSC_FALSE; /* set back to the default */ 1558 PetscFunctionReturn(0); 1559 } 1560