1 2 /* 3 Routines to compute overlapping regions of a parallel MPI matrix. 4 Used for finding submatrices that were shared across processors. 5 */ 6 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 7 #include <petscbt.h> 8 9 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat,PetscInt,IS*); 10 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat,PetscInt*,PetscInt,PetscInt*,PetscBT*); 11 12 /* -------------------------------------------------------------------------*/ 13 /* This code is modified from MatGetSubMatrices_MPIBAIJ_local() */ 14 #if defined (PETSC_USE_CTABLE) 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscGetProc_ijonly" 17 PetscErrorCode PetscGetProc_ijonly(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) 18 { 19 PetscInt nGlobalNd = proc_gnode[size]; 20 PetscMPIInt fproc = PetscMPIIntCast( (PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5))); 21 22 PetscFunctionBegin; 23 if (fproc > size) fproc = size; 24 while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { 25 if (row < proc_gnode[fproc]) fproc--; 26 else fproc++; 27 } 28 *rank = fproc; 29 PetscFunctionReturn(0); 30 } 31 #endif 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local_ijonly" 35 PetscErrorCode MatGetSubMatrices_MPIBAIJ_local_ijonly(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) 36 { 37 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 38 Mat A = c->A; 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; 40 const PetscInt **irow,**icol,*irow_i; 41 PetscInt *nrow,*ncol,*w3,*w4,start; 42 PetscErrorCode ierr; 43 PetscMPIInt size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc; 44 PetscInt **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row; 45 PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; 46 PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; 47 PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; 48 PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; 49 PetscInt *a_j=a->j,*b_j=b->j,*cworkA,*cworkB; //bs=C->rmap->bs,bs2=c->bs2 50 PetscInt cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; 51 PetscInt *bmap = c->garray,ctmp,rstart=c->rstartbs; 52 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3,*s_waits3; 53 //MPI_Request *r_waits4,*s_waits4; 54 MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2,*r_status3; 55 //MPI_Status *r_status4,*s_status4; 56 MPI_Comm comm; 57 //MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB; 58 //MatScalar *a_a=a->a,*b_a=b->a; 59 PetscBool flag; 60 PetscMPIInt *onodes1,*olengths1; 61 62 #if defined (PETSC_USE_CTABLE) 63 PetscInt tt; 64 PetscTable *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1; 65 #else 66 PetscInt **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; 67 #endif 68 69 PetscFunctionBegin; 70 comm = ((PetscObject)C)->comm; 71 tag0 = ((PetscObject)C)->tag; 72 size = c->size; 73 rank = c->rank; 74 75 /* Get some new tags to keep the communication clean */ 76 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 77 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 78 ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); 79 80 #if defined(PETSC_USE_CTABLE) 81 ierr = PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);CHKERRQ(ierr); 82 #else 83 ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,Mbs+1,PetscInt,&rtable);CHKERRQ(ierr); 84 /* Create hash table for the mapping :row -> proc*/ 85 for (i=0,j=0; i<size; i++) { 86 jmax = c->rowners[i+1]; 87 for (; j<jmax; j++) { 88 rtable[j] = i; 89 } 90 } 91 #endif 92 93 for (i=0; i<ismax; i++) { 94 ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); 95 ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); 97 ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); 98 } 99 100 /* evaluate communication - mesg to who,length of mesg,and buffer space 101 required. Based on this, buffers are allocated, and data copied into them*/ 102 ierr = PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr); 103 ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 104 ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 105 ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); 106 for (i=0; i<ismax; i++) { 107 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 108 jmax = nrow[i]; 109 irow_i = irow[i]; 110 for (j=0; j<jmax; j++) { 111 row = irow_i[j]; 112 #if defined (PETSC_USE_CTABLE) 113 ierr = PetscGetProc_ijonly(row,size,c->rangebs,&proc);CHKERRQ(ierr); 114 #else 115 proc = rtable[row]; 116 #endif 117 w4[proc]++; 118 } 119 for (j=0; j<size; j++) { 120 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 121 } 122 } 123 124 nrqs = 0; /* no of outgoing messages */ 125 msz = 0; /* total mesg length for all proc */ 126 w1[rank] = 0; /* no mesg sent to intself */ 127 w3[rank] = 0; 128 for (i=0; i<size; i++) { 129 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 130 } 131 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ 132 for (i=0,j=0; i<size; i++) { 133 if (w1[i]) { pa[j] = i; j++; } 134 } 135 136 /* Each message would have a header = 1 + 2*(no of IS) + data */ 137 for (i=0; i<nrqs; i++) { 138 j = pa[i]; 139 w1[j] += w2[j] + 2* w3[j]; 140 msz += w1[j]; 141 } 142 143 /* Determine the number of messages to expect, their lengths, from from-ids */ 144 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 145 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 146 147 /* Now post the Irecvs corresponding to these messages */ 148 ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); 149 150 ierr = PetscFree(onodes1);CHKERRQ(ierr); 151 ierr = PetscFree(olengths1);CHKERRQ(ierr); 152 153 /* Allocate Memory for outgoing messages */ 154 ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr); 155 ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); 156 ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); 157 { 158 PetscInt *iptr = tmp,ict = 0; 159 for (i=0; i<nrqs; i++) { 160 j = pa[i]; 161 iptr += ict; 162 sbuf1[j] = iptr; 163 ict = w1[j]; 164 } 165 } 166 167 /* Form the outgoing messages */ 168 /* Initialise the header space */ 169 for (i=0; i<nrqs; i++) { 170 j = pa[i]; 171 sbuf1[j][0] = 0; 172 ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 173 ptr[j] = sbuf1[j] + 2*w3[j] + 1; 174 } 175 176 /* Parse the isrow and copy data into outbuf */ 177 for (i=0; i<ismax; i++) { 178 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 179 irow_i = irow[i]; 180 jmax = nrow[i]; 181 for (j=0; j<jmax; j++) { /* parse the indices of each IS */ 182 row = irow_i[j]; 183 #if defined (PETSC_USE_CTABLE) 184 ierr = PetscGetProc_ijonly(row,size,c->rangebs,&proc);CHKERRQ(ierr); 185 #else 186 proc = rtable[row]; 187 #endif 188 if (proc != rank) { /* copy to the outgoing buf*/ 189 ctr[proc]++; 190 *ptr[proc] = row; 191 ptr[proc]++; 192 } 193 } 194 /* Update the headers for the current IS */ 195 for (j=0; j<size; j++) { /* Can Optimise this loop too */ 196 if ((ctr_j = ctr[j])) { 197 sbuf1_j = sbuf1[j]; 198 k = ++sbuf1_j[0]; 199 sbuf1_j[2*k] = ctr_j; 200 sbuf1_j[2*k-1] = i; 201 } 202 } 203 } 204 205 /* Now post the sends */ 206 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 207 for (i=0; i<nrqs; ++i) { 208 j = pa[i]; 209 ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); 210 } 211 212 /* Post Recieves to capture the buffer size */ 213 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); 214 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);CHKERRQ(ierr); 215 rbuf2[0] = tmp + msz; 216 for (i=1; i<nrqs; ++i) { 217 j = pa[i]; 218 rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 219 } 220 for (i=0; i<nrqs; ++i) { 221 j = pa[i]; 222 ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); 223 } 224 225 /* Send to other procs the buf size they should allocate */ 226 227 /* Receive messages*/ 228 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 229 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); 230 ierr = PetscMalloc3(nrqr+1,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);CHKERRQ(ierr); 231 { 232 Mat_SeqBAIJ *sA = (Mat_SeqBAIJ*)c->A->data,*sB = (Mat_SeqBAIJ*)c->B->data; 233 PetscInt *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; 234 235 for (i=0; i<nrqr; ++i) { 236 ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); 237 req_size[idex] = 0; 238 rbuf1_i = rbuf1[idex]; 239 start = 2*rbuf1_i[0] + 1; 240 ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); 241 ierr = PetscMalloc(end*sizeof(PetscInt),&sbuf2[idex]);CHKERRQ(ierr); 242 sbuf2_i = sbuf2[idex]; 243 for (j=start; j<end; j++) { 244 id = rbuf1_i[j] - rstart; 245 ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id]; 246 sbuf2_i[j] = ncols; 247 req_size[idex] += ncols; 248 } 249 req_source[idex] = r_status1[i].MPI_SOURCE; 250 /* form the header */ 251 sbuf2_i[0] = req_size[idex]; 252 for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; } 253 ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);CHKERRQ(ierr); 254 } 255 } 256 ierr = PetscFree(r_status1);CHKERRQ(ierr); 257 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 258 259 /* recv buffer sizes */ 260 /* Receive messages*/ 261 262 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);CHKERRQ(ierr); 263 //ierr = PetscMalloc((nrqs+1)*sizeof(MatScalar*),&rbuf4);CHKERRQ(ierr); 264 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr); 265 //ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr); 266 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); 267 268 for (i=0; i<nrqs; ++i) { 269 ierr = MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);CHKERRQ(ierr); 270 ierr = PetscMalloc(rbuf2[idex][0]*sizeof(PetscInt),&rbuf3[idex]);CHKERRQ(ierr); 271 //ierr = PetscMalloc(rbuf2[idex][0]*bs2*sizeof(MatScalar),&rbuf4[idex]);CHKERRQ(ierr); 272 ierr = MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);CHKERRQ(ierr); 273 //ierr = MPI_Irecv(rbuf4[idex],rbuf2[idex][0]*bs2,MPIU_MATSCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);CHKERRQ(ierr); 274 } 275 ierr = PetscFree(r_status2);CHKERRQ(ierr); 276 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 277 278 /* Wait on sends1 and sends2 */ 279 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); 280 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); 281 282 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} 283 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} 284 ierr = PetscFree(s_status1);CHKERRQ(ierr); 285 ierr = PetscFree(s_status2);CHKERRQ(ierr); 286 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 287 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 288 289 /* Now allocate buffers for a->j, and send them off */ 290 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr); 291 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 292 ierr = PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);CHKERRQ(ierr); 293 for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 294 295 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);CHKERRQ(ierr); 296 { 297 for (i=0; i<nrqr; i++) { 298 rbuf1_i = rbuf1[i]; 299 sbuf_aj_i = sbuf_aj[i]; 300 ct1 = 2*rbuf1_i[0] + 1; 301 ct2 = 0; 302 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 303 kmax = rbuf1[i][2*j]; 304 for (k=0; k<kmax; k++,ct1++) { 305 row = rbuf1_i[ct1] - rstart; 306 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 307 ncols = nzA + nzB; 308 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 309 310 /* load the column indices for this row into cols*/ 311 cols = sbuf_aj_i + ct2; 312 for (l=0; l<nzB; l++) { 313 if ((ctmp = bmap[cworkB[l]]) < cstart) cols[l] = ctmp; 314 else break; 315 } 316 imark = l; 317 for (l=0; l<nzA; l++) cols[imark+l] = cstart + cworkA[l]; 318 for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]]; 319 ct2 += ncols; 320 } 321 } 322 ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr); 323 } 324 } 325 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);CHKERRQ(ierr); 326 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);CHKERRQ(ierr); 327 328 #if defined(MV) 329 /* Allocate buffers for a->a, and send them off */ 330 ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar *),&sbuf_aa);CHKERRQ(ierr); 331 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 332 ierr = PetscMalloc((j+1)*bs2*sizeof(MatScalar),&sbuf_aa[0]);CHKERRQ(ierr); 333 for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2; 334 335 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);CHKERRQ(ierr); 336 { 337 for (i=0; i<nrqr; i++) { 338 rbuf1_i = rbuf1[i]; 339 sbuf_aa_i = sbuf_aa[i]; 340 ct1 = 2*rbuf1_i[0]+1; 341 ct2 = 0; 342 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 343 kmax = rbuf1_i[2*j]; 344 for (k=0; k<kmax; k++,ct1++) { 345 row = rbuf1_i[ct1] - rstart; 346 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 347 ncols = nzA + nzB; 348 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 349 vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2; 350 351 /* load the column values for this row into vals*/ 352 vals = sbuf_aa_i+ct2*bs2; 353 for (l=0; l<nzB; l++) { 354 if ((bmap[cworkB[l]]) < cstart) { 355 ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 356 } 357 else break; 358 } 359 imark = l; 360 for (l=0; l<nzA; l++) { 361 ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 362 } 363 for (l=imark; l<nzB; l++) { 364 ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 365 } 366 ct2 += ncols; 367 } 368 } 369 ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_MATSCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); 370 } 371 } 372 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); 373 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); 374 #endif 375 ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); 376 ierr = PetscFree(rbuf1);CHKERRQ(ierr); 377 378 /* Form the matrix */ 379 /* create col map */ 380 { 381 const PetscInt *icol_i; 382 #if defined (PETSC_USE_CTABLE) 383 /* Create row map*/ 384 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&colmaps);CHKERRQ(ierr); 385 for (i=0; i<ismax; i++) { 386 ierr = PetscTableCreate(ncol[i]+1,&colmaps[i]);CHKERRQ(ierr); 387 } 388 #else 389 ierr = PetscMalloc(ismax*sizeof(PetscInt*),&cmap);CHKERRQ(ierr); 390 ierr = PetscMalloc(ismax*c->Nbs*sizeof(PetscInt),&cmap[0]);CHKERRQ(ierr); 391 for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + c->Nbs; } 392 #endif 393 for (i=0; i<ismax; i++) { 394 jmax = ncol[i]; 395 icol_i = icol[i]; 396 #if defined (PETSC_USE_CTABLE) 397 lcol1_gcol1 = colmaps[i]; 398 for (j=0; j<jmax; j++) { 399 ierr = PetscTableAdd(lcol1_gcol1,icol_i[j]+1,j+1);CHKERRQ(ierr); 400 } 401 #else 402 cmap_i = cmap[i]; 403 for (j=0; j<jmax; j++) { 404 cmap_i[icol_i[j]] = j+1; 405 } 406 #endif 407 } 408 } 409 410 /* Create lens which is required for MatCreate... */ 411 for (i=0,j=0; i<ismax; i++) { j += nrow[i]; } 412 ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ j*sizeof(PetscInt),&lens);CHKERRQ(ierr); 413 lens[0] = (PetscInt*)(lens + ismax); 414 ierr = PetscMemzero(lens[0],j*sizeof(PetscInt));CHKERRQ(ierr); 415 for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; } 416 417 /* Update lens from local data */ 418 for (i=0; i<ismax; i++) { 419 jmax = nrow[i]; 420 #if defined (PETSC_USE_CTABLE) 421 lcol1_gcol1 = colmaps[i]; 422 #else 423 cmap_i = cmap[i]; 424 #endif 425 irow_i = irow[i]; 426 lens_i = lens[i]; 427 for (j=0; j<jmax; j++) { 428 row = irow_i[j]; 429 #if defined (PETSC_USE_CTABLE) 430 ierr = PetscGetProc_ijonly(row,size,c->rangebs,&proc);CHKERRQ(ierr); 431 #else 432 proc = rtable[row]; 433 #endif 434 if (proc == rank) { 435 /* Get indices from matA and then from matB */ 436 row = row - rstart; 437 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 438 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 439 #if defined (PETSC_USE_CTABLE) 440 for (k=0; k<nzA; k++) { 441 ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr); 442 if (tt) { lens_i[j]++; } 443 } 444 for (k=0; k<nzB; k++) { 445 ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr); 446 if (tt) { lens_i[j]++; } 447 } 448 #else 449 for (k=0; k<nzA; k++) { 450 if (cmap_i[cstart + cworkA[k]]) { lens_i[j]++; } 451 } 452 for (k=0; k<nzB; k++) { 453 if (cmap_i[bmap[cworkB[k]]]) { lens_i[j]++; } 454 } 455 #endif 456 } 457 } 458 } 459 #if defined (PETSC_USE_CTABLE) 460 /* Create row map*/ 461 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&rowmaps);CHKERRQ(ierr); 462 for (i=0; i<ismax; i++){ 463 ierr = PetscTableCreate(nrow[i]+1,&rowmaps[i]);CHKERRQ(ierr); 464 } 465 #else 466 /* Create row map*/ 467 ierr = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ ismax*Mbs*sizeof(PetscInt),&rmap);CHKERRQ(ierr); 468 rmap[0] = (PetscInt*)(rmap + ismax); 469 ierr = PetscMemzero(rmap[0],ismax*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 470 for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + Mbs;} 471 #endif 472 for (i=0; i<ismax; i++) { 473 irow_i = irow[i]; 474 jmax = nrow[i]; 475 #if defined (PETSC_USE_CTABLE) 476 lrow1_grow1 = rowmaps[i]; 477 for (j=0; j<jmax; j++) { 478 ierr = PetscTableAdd(lrow1_grow1,irow_i[j]+1,j+1);CHKERRQ(ierr); 479 } 480 #else 481 rmap_i = rmap[i]; 482 for (j=0; j<jmax; j++) { 483 rmap_i[irow_i[j]] = j; 484 } 485 #endif 486 } 487 488 /* Update lens from offproc data */ 489 { 490 PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i; 491 PetscMPIInt ii; 492 493 for (tmp2=0; tmp2<nrqs; tmp2++) { 494 ierr = MPI_Waitany(nrqs,r_waits3,&ii,r_status3+tmp2);CHKERRQ(ierr); 495 idex = pa[ii]; 496 sbuf1_i = sbuf1[idex]; 497 jmax = sbuf1_i[0]; 498 ct1 = 2*jmax+1; 499 ct2 = 0; 500 rbuf2_i = rbuf2[ii]; 501 rbuf3_i = rbuf3[ii]; 502 for (j=1; j<=jmax; j++) { 503 is_no = sbuf1_i[2*j-1]; 504 max1 = sbuf1_i[2*j]; 505 lens_i = lens[is_no]; 506 #if defined (PETSC_USE_CTABLE) 507 lcol1_gcol1 = colmaps[is_no]; 508 lrow1_grow1 = rowmaps[is_no]; 509 #else 510 cmap_i = cmap[is_no]; 511 rmap_i = rmap[is_no]; 512 #endif 513 for (k=0; k<max1; k++,ct1++) { 514 #if defined (PETSC_USE_CTABLE) 515 ierr = PetscTableFind(lrow1_grow1,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr); 516 row--; 517 if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); } 518 #else 519 row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */ 520 #endif 521 max2 = rbuf2_i[ct1]; 522 for (l=0; l<max2; l++,ct2++) { 523 #if defined (PETSC_USE_CTABLE) 524 ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tt);CHKERRQ(ierr); 525 if (tt) { 526 lens_i[row]++; 527 } 528 #else 529 if (cmap_i[rbuf3_i[ct2]]) { 530 lens_i[row]++; 531 } 532 #endif 533 } 534 } 535 } 536 } 537 } 538 ierr = PetscFree(r_status3);CHKERRQ(ierr); 539 ierr = PetscFree(r_waits3);CHKERRQ(ierr); 540 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);} 541 ierr = PetscFree(s_status3);CHKERRQ(ierr); 542 ierr = PetscFree(s_waits3);CHKERRQ(ierr); 543 544 /* Create the submatrices */ 545 if (scall == MAT_REUSE_MATRIX) { 546 /* 547 Assumes new rows are same length as the old rows, hence bug! 548 */ 549 for (i=0; i<ismax; i++) { 550 mat = (Mat_SeqBAIJ *)(submats[i]->data); 551 //if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 552 if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 553 ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 554 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); 555 /* Initial matrix as if empty */ 556 ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr); 557 submats[i]->factortype = C->factortype; 558 } 559 } else { 560 for (i=0; i<ismax; i++) { 561 ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); 562 //ierr = MatSetSizes(submats[i],nrow[i]*bs,ncol[i]*bs,nrow[i]*bs,ncol[i]*bs);CHKERRQ(ierr); 563 ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr); 564 ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); 565 ierr = MatSeqBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); 566 //ierr = MatSeqBAIJSetPreallocation(submats[i],1,0,lens[i]);CHKERRQ(ierr); 567 ierr = MatSeqSBAIJSetPreallocation(submats[i],1,0,lens[i]);CHKERRQ(ierr); 568 //ierr = MatSeqSBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by MatGetSubMatrices_MPISBAIJ()*/ 569 } 570 } 571 572 /* Assemble the matrices */ 573 /* First assemble the local rows */ 574 { 575 PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i; 576 //MatScalar *imat_a; 577 578 for (i=0; i<ismax; i++) { 579 mat = (Mat_SeqBAIJ*)submats[i]->data; 580 imat_ilen = mat->ilen; 581 imat_j = mat->j; 582 imat_i = mat->i; 583 //imat_a = mat->a; 584 585 #if defined (PETSC_USE_CTABLE) 586 lcol1_gcol1 = colmaps[i]; 587 lrow1_grow1 = rowmaps[i]; 588 #else 589 cmap_i = cmap[i]; 590 rmap_i = rmap[i]; 591 #endif 592 irow_i = irow[i]; 593 jmax = nrow[i]; 594 for (j=0; j<jmax; j++) { 595 row = irow_i[j]; 596 #if defined (PETSC_USE_CTABLE) 597 ierr = PetscGetProc_ijonly(row,size,c->rangebs,&proc);CHKERRQ(ierr); 598 #else 599 proc = rtable[row]; 600 #endif 601 if (proc == rank) { 602 row = row - rstart; 603 nzA = a_i[row+1] - a_i[row]; 604 nzB = b_i[row+1] - b_i[row]; 605 cworkA = a_j + a_i[row]; 606 cworkB = b_j + b_i[row]; 607 //vworkA = a_a + a_i[row]*bs2; 608 //vworkB = b_a + b_i[row]*bs2; 609 #if defined (PETSC_USE_CTABLE) 610 ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr); 611 row--; 612 if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); } 613 #else 614 row = rmap_i[row + rstart]; 615 #endif 616 mat_i = imat_i[row]; 617 //mat_a = imat_a + mat_i*bs2; 618 mat_j = imat_j + mat_i; 619 ilen_row = imat_ilen[row]; 620 621 /* load the column indices for this row into cols*/ 622 for (l=0; l<nzB; l++) { 623 if ((ctmp = bmap[cworkB[l]]) < cstart) { 624 #if defined (PETSC_USE_CTABLE) 625 ierr = PetscTableFind(lcol1_gcol1,ctmp+1,&tcol);CHKERRQ(ierr); 626 if (tcol) { 627 #else 628 if ((tcol = cmap_i[ctmp])) { 629 #endif 630 *mat_j++ = tcol - 1; 631 //ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 632 //mat_a += bs2; 633 ilen_row++; 634 } 635 } else break; 636 } 637 imark = l; 638 for (l=0; l<nzA; l++) { 639 #if defined (PETSC_USE_CTABLE) 640 ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr); 641 if (tcol) { 642 #else 643 if ((tcol = cmap_i[cstart + cworkA[l]])) { 644 #endif 645 *mat_j++ = tcol - 1; 646 //ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 647 //mat_a += bs2; 648 ilen_row++; 649 } 650 } 651 for (l=imark; l<nzB; l++) { 652 #if defined (PETSC_USE_CTABLE) 653 ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr); 654 if (tcol) { 655 #else 656 if ((tcol = cmap_i[bmap[cworkB[l]]])) { 657 #endif 658 *mat_j++ = tcol - 1; 659 //ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 //mat_a += bs2; 661 ilen_row++; 662 } 663 } 664 imat_ilen[row] = ilen_row; 665 } 666 } 667 } 668 } 669 670 /* Now assemble the off proc rows*/ 671 { 672 PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen; 673 PetscInt *imat_j,*imat_i; 674 //MatScalar *imat_a,*rbuf4_i; 675 PetscMPIInt ii; 676 677 for (tmp2=0; tmp2<nrqs; tmp2++) { 678 //ierr = MPI_Waitany(nrqs,r_waits4,&ii,r_status4+tmp2);CHKERRQ(ierr); 679 ii = tmp2; //new 680 idex = pa[ii]; 681 sbuf1_i = sbuf1[idex]; 682 jmax = sbuf1_i[0]; 683 ct1 = 2*jmax + 1; 684 ct2 = 0; 685 rbuf2_i = rbuf2[ii]; 686 rbuf3_i = rbuf3[ii]; 687 //rbuf4_i = rbuf4[ii]; 688 for (j=1; j<=jmax; j++) { 689 is_no = sbuf1_i[2*j-1]; 690 #if defined (PETSC_USE_CTABLE) 691 lrow1_grow1 = rowmaps[is_no]; 692 lcol1_gcol1 = colmaps[is_no]; 693 #else 694 rmap_i = rmap[is_no]; 695 cmap_i = cmap[is_no]; 696 #endif 697 mat = (Mat_SeqBAIJ*)submats[is_no]->data; 698 imat_ilen = mat->ilen; 699 imat_j = mat->j; 700 imat_i = mat->i; 701 //imat_a = mat->a; 702 max1 = sbuf1_i[2*j]; 703 for (k=0; k<max1; k++,ct1++) { 704 row = sbuf1_i[ct1]; 705 #if defined (PETSC_USE_CTABLE) 706 ierr = PetscTableFind(lrow1_grow1,row+1,&row);CHKERRQ(ierr); 707 row--; 708 if(row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); } 709 #else 710 row = rmap_i[row]; 711 #endif 712 ilen = imat_ilen[row]; 713 mat_i = imat_i[row]; 714 //mat_a = imat_a + mat_i*bs2; 715 mat_j = imat_j + mat_i; 716 max2 = rbuf2_i[ct1]; 717 for (l=0; l<max2; l++,ct2++) { 718 #if defined (PETSC_USE_CTABLE) 719 ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 720 if (tcol) { 721 #else 722 if ((tcol = cmap_i[rbuf3_i[ct2]])) { 723 #endif 724 *mat_j++ = tcol - 1; 725 /* *mat_a++= rbuf4_i[ct2]; */ 726 //ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 727 //mat_a += bs2; 728 ilen++; 729 } 730 } 731 imat_ilen[row] = ilen; 732 } 733 } 734 } 735 } 736 //ierr = PetscFree(r_status4);CHKERRQ(ierr); 737 //ierr = PetscFree(r_waits4);CHKERRQ(ierr); 738 //if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} 739 //ierr = PetscFree(s_waits4);CHKERRQ(ierr); 740 //ierr = PetscFree(s_status4);CHKERRQ(ierr); 741 742 /* Restore the indices */ 743 for (i=0; i<ismax; i++) { 744 ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); 745 ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); 746 } 747 748 /* Destroy allocated memory */ 749 #if defined(PETSC_USE_CTABLE) 750 ierr = PetscFree4(irow,icol,nrow,ncol);CHKERRQ(ierr); 751 #else 752 ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr); 753 #endif 754 ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr); 755 ierr = PetscFree(pa);CHKERRQ(ierr); 756 757 ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr); 758 ierr = PetscFree(sbuf1);CHKERRQ(ierr); 759 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 760 for (i=0; i<nrqr; ++i) { 761 ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); 762 } 763 for (i=0; i<nrqs; ++i) { 764 ierr = PetscFree(rbuf3[i]);CHKERRQ(ierr); 765 //ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr); 766 } 767 ierr = PetscFree3(sbuf2,req_size,req_source);CHKERRQ(ierr); 768 ierr = PetscFree(rbuf3);CHKERRQ(ierr); 769 //ierr = PetscFree(rbuf4);CHKERRQ(ierr); 770 ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); 771 ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); 772 //ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr); 773 //ierr = PetscFree(sbuf_aa);CHKERRQ(ierr); 774 775 #if defined (PETSC_USE_CTABLE) 776 for (i=0; i<ismax; i++){ 777 ierr = PetscTableDestroy(rowmaps[i]);CHKERRQ(ierr); 778 ierr = PetscTableDestroy(colmaps[i]);CHKERRQ(ierr); 779 } 780 ierr = PetscFree(colmaps);CHKERRQ(ierr); 781 ierr = PetscFree(rowmaps);CHKERRQ(ierr); 782 #else 783 ierr = PetscFree(rmap);CHKERRQ(ierr); 784 ierr = PetscFree(cmap[0]);CHKERRQ(ierr); 785 ierr = PetscFree(cmap);CHKERRQ(ierr); 786 #endif 787 ierr = PetscFree(lens);CHKERRQ(ierr); 788 789 for (i=0; i<ismax; i++) { 790 ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 791 ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 792 } 793 PetscFunctionReturn(0); 794 } 795 796 /* ------------------------------------------------------- */ 797 798 #undef __FUNCT__ 799 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ" 800 PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) 801 { 802 PetscErrorCode ierr; 803 PetscInt i,N=C->cmap->N, bs=C->rmap->bs; 804 IS *is_new; 805 806 PetscFunctionBegin; 807 ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); 808 /* Convert the indices into block format */ 809 ierr = ISCompressIndicesGeneral(N,bs,is_max,is,is_new);CHKERRQ(ierr); 810 if (ov < 0){ SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} 811 812 //------- new ---------- 813 PetscBool flg=PETSC_FALSE; 814 ierr = PetscOptionsHasName(PETSC_NULL, "-IncreaseOverlap_new", &flg);CHKERRQ(ierr); 815 if (!flg){ /* previous non-scalable implementation */ 816 for (i=0; i<ov; ++i) { 817 ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); 818 } 819 } else { /* scalable implementation using modified BAIJ routines */ 820 821 Mat *submats; 822 IS *is_row; 823 PetscInt M=C->rmap->N,Mbs=M/bs,*nidx,isz,iov; 824 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 825 PetscMPIInt rank=c->rank; 826 827 828 ierr = PetscMalloc((Mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 829 830 /* Create is_row */ 831 ierr = PetscMalloc(is_max*sizeof(IS **),&is_row);CHKERRQ(ierr); 832 for (i=0; i<is_max; i++){ 833 ierr = ISCreateStride(PETSC_COMM_SELF,Mbs,0,1,is_row+i);CHKERRQ(ierr); 834 } 835 836 for (iov=0; iov<ov; ++iov) { 837 /* Get submats for column search - replace with MatGetSubMatrices_MPIBAIJ_IJ() */ 838 // copied from MatGetSubMatrices_MPIBAIJ() ---------------- 839 /* Allocate memory to hold all the submatrices */ 840 //if (scall != MAT_REUSE_MATRIX) { 841 ierr = PetscMalloc((is_max+1)*sizeof(Mat),&submats);CHKERRQ(ierr); 842 //} 843 /* Determine the number of stages through which submatrices are done */ 844 PetscInt nmax,nstages_local,nstages,max_no,pos; 845 nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); 846 if (!nmax) nmax = 1; 847 nstages_local = is_max/nmax + ((is_max % nmax)?1:0); 848 849 /* Make sure every processor loops through the nstages */ 850 ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr); 851 for (i=0,pos=0; i<nstages; i++) { 852 if (pos+nmax <= is_max) max_no = nmax; 853 else if (pos == is_max) max_no = 0; 854 else max_no = is_max-pos; 855 856 //ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,submats+pos);CHKERRQ(ierr); 857 ierr = MatGetSubMatrices_MPIBAIJ_local_ijonly(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,submats+pos);CHKERRQ(ierr); 858 if (rank == 10 && !i){ 859 printf("submats[0]:\n"); 860 ierr = MatView(submats[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 861 } 862 863 pos += max_no; 864 } 865 // end of copy ----------------------------------- 866 867 /* Row search */ 868 ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); 869 870 /* Column search */ 871 PetscBT table; 872 Mat_SeqSBAIJ *asub_i; 873 PetscInt *ai,brow,nz,nis,l; 874 const PetscInt *idx; 875 876 ierr = PetscBTCreate(Mbs,table);CHKERRQ(ierr); 877 for (i=0; i<is_max; i++){ 878 asub_i = (Mat_SeqSBAIJ*)submats[i]->data; 879 ai=asub_i->i;; 880 881 /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ 882 ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr); 883 884 ierr = ISGetIndices(is_new[i],&idx);CHKERRQ(ierr); 885 ierr = ISGetLocalSize(is_new[i],&nis);CHKERRQ(ierr); 886 for (l=0; l<nis; l++) { 887 ierr = PetscBTSet(table,idx[l]);CHKERRQ(ierr); 888 nidx[l] = idx[l]; 889 } 890 isz = nis; 891 892 for (brow=0; brow<Mbs; brow++){ 893 nz = ai[brow+1] - ai[brow]; 894 if (nz) { 895 if (!PetscBTLookupSet(table,brow)) nidx[isz++] = brow; 896 } 897 } 898 ierr = ISRestoreIndices(is_new[i],&idx);CHKERRQ(ierr); 899 ierr = ISDestroy(is_new[i]);CHKERRQ(ierr); 900 901 /* create updated is_new */ 902 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_COPY_VALUES,is_new+i);CHKERRQ(ierr); 903 } 904 905 /* Free tmp spaces */ 906 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 907 for (i=0; i<is_max; i++){ 908 if (rank == 10 && !i){ 909 printf("submats[0]:\n"); 910 ierr = MatView(submats[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 911 } 912 ierr = MatDestroy(submats[i]);CHKERRQ(ierr); 913 } 914 ierr = PetscFree(submats);CHKERRQ(ierr); 915 } 916 917 for (i=0; i<is_max; i++){ 918 ierr = ISDestroy(is_row[i]);CHKERRQ(ierr); 919 } 920 ierr = PetscFree(is_row);CHKERRQ(ierr); 921 ierr = PetscFree(nidx);CHKERRQ(ierr); 922 } 923 //--------end of new---------- 924 925 for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);} 926 ierr = ISExpandIndicesGeneral(N,bs,is_max,is_new,is);CHKERRQ(ierr); 927 928 for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);} 929 ierr = PetscFree(is_new);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 typedef enum {MINE,OTHER} WhoseOwner; 934 /* data1, odata1 and odata2 are packed in the format (for communication): 935 data[0] = is_max, no of is 936 data[1] = size of is[0] 937 ... 938 data[is_max] = size of is[is_max-1] 939 data[is_max + 1] = data(is[0]) 940 ... 941 data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) 942 ... 943 data2 is packed in the format (for creating output is[]): 944 data[0] = is_max, no of is 945 data[1] = size of is[0] 946 ... 947 data[is_max] = size of is[is_max-1] 948 data[is_max + 1] = data(is[0]) 949 ... 950 data[is_max + 1 + Mbs*i) = data(is[i]) 951 ... 952 */ 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once" 955 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) 956 { 957 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 958 PetscErrorCode ierr; 959 PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len; 960 const PetscInt *idx_i; 961 PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i, 962 Mbs,i,j,k,*odata1,*odata2, 963 proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; 964 PetscInt proc_end=0,*iwork,len_unused,nodata2; 965 PetscInt ois_max; /* max no of is[] in each of processor */ 966 char *t_p; 967 MPI_Comm comm; 968 MPI_Request *s_waits1,*s_waits2,r_req; 969 MPI_Status *s_status,r_status; 970 PetscBT *table; /* mark indices of this processor's is[] */ 971 PetscBT table_i; 972 PetscBT otable; /* mark indices of other processors' is[] */ 973 PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; 974 IS garray_local,garray_gl; 975 976 PetscFunctionBegin; 977 comm = ((PetscObject)C)->comm; 978 size = c->size; 979 rank = c->rank; 980 Mbs = c->Mbs; 981 982 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 983 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 984 985 /* create tables used in 986 step 1: table[i] - mark c->garray of proc [i] 987 step 3: table[i] - mark indices of is[i] when whose=MINE 988 table[0] - mark incideces of is[] when whose=OTHER */ 989 len = PetscMax(is_max, size);CHKERRQ(ierr); 990 ierr = PetscMalloc2(len,PetscBT,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,char,&t_p);CHKERRQ(ierr); 991 for (i=0; i<len; i++) { 992 table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 993 } 994 995 ierr = MPI_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 996 997 /* 1. Send this processor's is[] to other processors */ 998 /*---------------------------------------------------*/ 999 /* allocate spaces */ 1000 ierr = PetscMalloc(is_max*sizeof(PetscInt),&n);CHKERRQ(ierr); 1001 len = 0; 1002 for (i=0; i<is_max; i++) { 1003 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 1004 len += n[i]; 1005 } 1006 if (!len) { 1007 is_max = 0; 1008 } else { 1009 len += 1 + is_max; /* max length of data1 for one processor */ 1010 } 1011 1012 1013 ierr = PetscMalloc((size*len+1)*sizeof(PetscInt),&data1);CHKERRQ(ierr); 1014 ierr = PetscMalloc(size*sizeof(PetscInt*),&data1_start);CHKERRQ(ierr); 1015 for (i=0; i<size; i++) data1_start[i] = data1 + i*len; 1016 1017 ierr = PetscMalloc4(size,PetscInt,&len_s,size,PetscInt,&btable,size,PetscInt,&iwork,size+1,PetscInt,&Bowners);CHKERRQ(ierr); 1018 1019 /* gather c->garray from all processors */ 1020 ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local);CHKERRQ(ierr); 1021 ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); 1022 ierr = ISDestroy(garray_local);CHKERRQ(ierr); 1023 ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1024 Bowners[0] = 0; 1025 for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; 1026 1027 if (is_max){ 1028 /* hash table ctable which maps c->row to proc_id) */ 1029 ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr); 1030 for (proc_id=0,j=0; proc_id<size; proc_id++) { 1031 for (; j<C->rmap->range[proc_id+1]/bs; j++) { 1032 ctable[j] = proc_id; 1033 } 1034 } 1035 1036 /* hash tables marking c->garray */ 1037 ierr = ISGetIndices(garray_gl,&idx_i); 1038 for (i=0; i<size; i++){ 1039 table_i = table[i]; 1040 ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr); 1041 for (j = Bowners[i]; j<Bowners[i+1]; j++){ /* go through B cols of proc[i]*/ 1042 ierr = PetscBTSet(table_i,idx_i[j]);CHKERRQ(ierr); 1043 } 1044 } 1045 ierr = ISRestoreIndices(garray_gl,&idx_i);CHKERRQ(ierr); 1046 } /* if (is_max) */ 1047 ierr = ISDestroy(garray_gl);CHKERRQ(ierr); 1048 1049 /* evaluate communication - mesg to who, length, and buffer space */ 1050 for (i=0; i<size; i++) len_s[i] = 0; 1051 1052 /* header of data1 */ 1053 for (proc_id=0; proc_id<size; proc_id++){ 1054 iwork[proc_id] = 0; 1055 *data1_start[proc_id] = is_max; 1056 data1_start[proc_id]++; 1057 for (j=0; j<is_max; j++) { 1058 if (proc_id == rank){ 1059 *data1_start[proc_id] = n[j]; 1060 } else { 1061 *data1_start[proc_id] = 0; 1062 } 1063 data1_start[proc_id]++; 1064 } 1065 } 1066 1067 for (i=0; i<is_max; i++) { 1068 ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr); 1069 for (j=0; j<n[i]; j++){ 1070 idx = idx_i[j]; 1071 *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ 1072 proc_end = ctable[idx]; 1073 for (proc_id=0; proc_id<=proc_end; proc_id++){ /* for others to process */ 1074 if (proc_id == rank ) continue; /* done before this loop */ 1075 if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) 1076 continue; /* no need for sending idx to [proc_id] */ 1077 *data1_start[proc_id] = idx; data1_start[proc_id]++; 1078 len_s[proc_id]++; 1079 } 1080 } 1081 /* update header data */ 1082 for (proc_id=0; proc_id<size; proc_id++){ 1083 if (proc_id== rank) continue; 1084 *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; 1085 iwork[proc_id] = len_s[proc_id] ; 1086 } 1087 ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr); 1088 } 1089 1090 nrqs = 0; nrqr = 0; 1091 for (i=0; i<size; i++){ 1092 data1_start[i] = data1 + i*len; 1093 if (len_s[i]){ 1094 nrqs++; 1095 len_s[i] += 1 + is_max; /* add no. of header msg */ 1096 } 1097 } 1098 1099 for (i=0; i<is_max; i++) { 1100 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 1101 } 1102 ierr = PetscFree(n);CHKERRQ(ierr); 1103 ierr = PetscFree(ctable);CHKERRQ(ierr); 1104 1105 /* Determine the number of messages to expect, their lengths, from from-ids */ 1106 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&nrqr);CHKERRQ(ierr); 1107 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);CHKERRQ(ierr); 1108 1109 /* Now post the sends */ 1110 ierr = PetscMalloc2(size,MPI_Request,&s_waits1,size,MPI_Request,&s_waits2);CHKERRQ(ierr); 1111 k = 0; 1112 for (proc_id=0; proc_id<size; proc_id++){ /* send data1 to processor [proc_id] */ 1113 if (len_s[proc_id]){ 1114 ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr); 1115 k++; 1116 } 1117 } 1118 1119 /* 2. Receive other's is[] and process. Then send back */ 1120 /*-----------------------------------------------------*/ 1121 len = 0; 1122 for (i=0; i<nrqr; i++){ 1123 if (len_r1[i] > len)len = len_r1[i]; 1124 } 1125 ierr = PetscFree(len_r1);CHKERRQ(ierr); 1126 ierr = PetscFree(id_r1);CHKERRQ(ierr); 1127 1128 for (proc_id=0; proc_id<size; proc_id++) 1129 len_s[proc_id] = iwork[proc_id] = 0; 1130 1131 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&odata1);CHKERRQ(ierr); 1132 ierr = PetscMalloc(size*sizeof(PetscInt**),&odata2_ptr);CHKERRQ(ierr); 1133 ierr = PetscBTCreate(Mbs,otable);CHKERRQ(ierr); 1134 1135 len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ 1136 len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ 1137 ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); 1138 nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ 1139 odata2_ptr[nodata2] = odata2; 1140 len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ 1141 1142 k = 0; 1143 while (k < nrqr){ 1144 /* Receive messages */ 1145 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); 1146 if (flag){ 1147 ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); 1148 proc_id = r_status.MPI_SOURCE; 1149 ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); 1150 ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); 1151 1152 /* Process messages */ 1153 /* make sure there is enough unused space in odata2 array */ 1154 if (len_unused < len_max){ /* allocate more space for odata2 */ 1155 ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); 1156 odata2_ptr[++nodata2] = odata2; 1157 len_unused = len_est; 1158 } 1159 1160 ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); 1161 len = 1 + odata2[0]; 1162 for (i=0; i<odata2[0]; i++){ 1163 len += odata2[1 + i]; 1164 } 1165 1166 /* Send messages back */ 1167 ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k);CHKERRQ(ierr); 1168 k++; 1169 odata2 += len; 1170 len_unused -= len; 1171 len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ 1172 } 1173 } 1174 ierr = PetscFree(odata1);CHKERRQ(ierr); 1175 ierr = PetscBTDestroy(otable);CHKERRQ(ierr); 1176 1177 /* 3. Do local work on this processor's is[] */ 1178 /*-------------------------------------------*/ 1179 /* make sure there is enough unused space in odata2(=data) array */ 1180 len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ 1181 if (len_unused < len_max){ /* allocate more space for odata2 */ 1182 ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); 1183 odata2_ptr[++nodata2] = odata2; 1184 len_unused = len_est; 1185 } 1186 1187 data = odata2; 1188 ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table);CHKERRQ(ierr); 1189 ierr = PetscFree(data1_start);CHKERRQ(ierr); 1190 1191 /* 4. Receive work done on other processors, then merge */ 1192 /*------------------------------------------------------*/ 1193 /* get max number of messages that this processor expects to recv */ 1194 ierr = MPI_Allreduce(len_s,iwork,size,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 1195 ierr = PetscMalloc((iwork[rank]+1)*sizeof(PetscInt),&data2);CHKERRQ(ierr); 1196 ierr = PetscFree4(len_s,btable,iwork,Bowners);CHKERRQ(ierr); 1197 1198 k = 0; 1199 while (k < nrqs){ 1200 /* Receive messages */ 1201 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); 1202 if (flag){ 1203 ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); 1204 proc_id = r_status.MPI_SOURCE; 1205 ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); 1206 ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); 1207 if (len > 1+is_max){ /* Add data2 into data */ 1208 data2_i = data2 + 1 + is_max; 1209 for (i=0; i<is_max; i++){ 1210 table_i = table[i]; 1211 data_i = data + 1 + is_max + Mbs*i; 1212 isz = data[1+i]; 1213 for (j=0; j<data2[1+i]; j++){ 1214 col = data2_i[j]; 1215 if (!PetscBTLookupSet(table_i,col)) {data_i[isz++] = col;} 1216 } 1217 data[1+i] = isz; 1218 if (i < is_max - 1) data2_i += data2[1+i]; 1219 } 1220 } 1221 k++; 1222 } 1223 } 1224 ierr = PetscFree(data2);CHKERRQ(ierr); 1225 ierr = PetscFree2(table,t_p);CHKERRQ(ierr); 1226 1227 /* phase 1 sends are complete */ 1228 ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 1229 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} 1230 ierr = PetscFree(data1);CHKERRQ(ierr); 1231 1232 /* phase 2 sends are complete */ 1233 if (nrqr){ierr = MPI_Waitall(nrqr,s_waits2,s_status);CHKERRQ(ierr);} 1234 ierr = PetscFree2(s_waits1,s_waits2);CHKERRQ(ierr); 1235 ierr = PetscFree(s_status);CHKERRQ(ierr); 1236 1237 /* 5. Create new is[] */ 1238 /*--------------------*/ 1239 for (i=0; i<is_max; i++) { 1240 data_i = data + 1 + is_max + Mbs*i; 1241 ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); 1242 } 1243 for (k=0; k<=nodata2; k++){ 1244 ierr = PetscFree(odata2_ptr[k]);CHKERRQ(ierr); 1245 } 1246 ierr = PetscFree(odata2_ptr);CHKERRQ(ierr); 1247 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local" 1253 /* 1254 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do 1255 the work on the local processor. 1256 1257 Inputs: 1258 C - MAT_MPISBAIJ; 1259 data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format. 1260 whose - whose is[] to be processed, 1261 MINE: this processor's is[] 1262 OTHER: other processor's is[] 1263 Output: 1264 nidx - whose = MINE: 1265 holds input and newly found indices in the same format as data 1266 whose = OTHER: 1267 only holds the newly found indices 1268 table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'. 1269 */ 1270 /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */ 1271 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C,PetscInt *data,PetscInt whose,PetscInt *nidx,PetscBT *table) 1272 { 1273 Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; 1274 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; 1275 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; 1276 PetscErrorCode ierr; 1277 PetscInt row,mbs,Mbs,*nidx_i,col,col_max,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l; 1278 PetscInt a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; 1279 PetscBT table0; /* mark the indices of input is[] for look up */ 1280 PetscBT table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */ 1281 1282 PetscFunctionBegin; 1283 Mbs = c->Mbs; mbs = a->mbs; 1284 ai = a->i; aj = a->j; 1285 bi = b->i; bj = b->j; 1286 garray = c->garray; 1287 rstart = c->rstartbs; 1288 is_max = data[0]; 1289 1290 ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr); 1291 1292 nidx[0] = is_max; 1293 idx_i = data + is_max + 1; /* ptr to input is[0] array */ 1294 nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ 1295 for (i=0; i<is_max; i++) { /* for each is */ 1296 isz = 0; 1297 n = data[1+i]; /* size of input is[i] */ 1298 1299 /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */ 1300 if (whose == MINE){ /* process this processor's is[] */ 1301 table_i = table[i]; 1302 nidx_i = nidx + 1+ is_max + Mbs*i; 1303 } else { /* process other processor's is[] - only use one temp table */ 1304 table_i = table[0]; 1305 } 1306 ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr); 1307 ierr = PetscBTMemzero(Mbs,table0);CHKERRQ(ierr); 1308 if (n==0) { 1309 nidx[1+i] = 0; /* size of new is[i] */ 1310 continue; 1311 } 1312 1313 isz0 = 0; col_max = 0; 1314 for (j=0; j<n; j++){ 1315 col = idx_i[j]; 1316 if (col >= Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index col %D >= Mbs %D",col,Mbs); 1317 if(!PetscBTLookupSet(table_i,col)) { 1318 ierr = PetscBTSet(table0,col);CHKERRQ(ierr); 1319 if (whose == MINE) {nidx_i[isz0] = col;} 1320 if (col_max < col) col_max = col; 1321 isz0++; 1322 } 1323 } 1324 1325 if (whose == MINE) {isz = isz0;} 1326 k = 0; /* no. of indices from input is[i] that have been examined */ 1327 for (row=0; row<mbs; row++){ 1328 a_start = ai[row]; a_end = ai[row+1]; 1329 b_start = bi[row]; b_end = bi[row+1]; 1330 if (PetscBTLookup(table0,row+rstart)){ /* row is on input is[i]: 1331 do row search: collect all col in this row */ 1332 for (l = a_start; l<a_end ; l++){ /* Amat */ 1333 col = aj[l] + rstart; 1334 if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;} 1335 } 1336 for (l = b_start; l<b_end ; l++){ /* Bmat */ 1337 col = garray[bj[l]]; 1338 if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;} 1339 } 1340 k++; 1341 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 1342 } else { /* row is not on input is[i]: 1343 do col serach: add row onto nidx_i if there is a col in nidx_i */ 1344 for (l = a_start; l<a_end ; l++){ /* Amat */ 1345 col = aj[l] + rstart; 1346 if (col > col_max) break; 1347 if (PetscBTLookup(table0,col)){ 1348 if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} 1349 break; /* for l = start; l<end ; l++) */ 1350 } 1351 } 1352 for (l = b_start; l<b_end ; l++){ /* Bmat */ 1353 col = garray[bj[l]]; 1354 if (col > col_max) break; 1355 if (PetscBTLookup(table0,col)){ 1356 if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} 1357 break; /* for l = start; l<end ; l++) */ 1358 } 1359 } 1360 } 1361 } 1362 1363 if (i < is_max - 1){ 1364 idx_i += n; /* ptr to input is[i+1] array */ 1365 nidx_i += isz; /* ptr to output is[i+1] array */ 1366 } 1367 nidx[1+i] = isz; /* size of new is[i] */ 1368 } /* for each is */ 1369 ierr = PetscBTDestroy(table0);CHKERRQ(ierr); 1370 1371 PetscFunctionReturn(0); 1372 } 1373 1374 1375