/* Routines to compute overlapping regions of a parallel MPI matrix. Used for finding submatrices that were shared across processors. */ #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> #include static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat,PetscInt*,PetscInt,PetscInt*,PetscBT*); /* -------------------------------------------------------------------------*/ /* This code is modified from MatGetSubMatrices_MPIBAIJ_local() */ #if defined (PETSC_USE_CTABLE) #undef __FUNCT__ #define __FUNCT__ "PetscGetProc_ijonly" PetscErrorCode PetscGetProc_ijonly(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) { PetscInt nGlobalNd = proc_gnode[size]; PetscMPIInt fproc = PetscMPIIntCast( (PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5))); PetscFunctionBegin; if (fproc > size) fproc = size; while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { if (row < proc_gnode[fproc]) fproc--; else fproc++; } *rank = fproc; PetscFunctionReturn(0); } #endif #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local_ijonly" PetscErrorCode MatGetSubMatrices_MPIBAIJ_local_ijonly(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; Mat A = c->A; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w3,*w4,start; PetscErrorCode ierr; PetscMPIInt size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc; PetscInt **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row; PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; PetscInt *a_j=a->j,*b_j=b->j,*cworkA,*cworkB; //bs=C->rmap->bs,bs2=c->bs2 PetscInt cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; PetscInt *bmap = c->garray,ctmp,rstart=c->rstartbs; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3,*s_waits3; //MPI_Request *r_waits4,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2,*r_status3; //MPI_Status *r_status4,*s_status4; MPI_Comm comm; //MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB; //MatScalar *a_a=a->a,*b_a=b->a; PetscBool flag; PetscMPIInt *onodes1,*olengths1; #if defined (PETSC_USE_CTABLE) PetscInt tt; PetscTable *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1; #else PetscInt **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; #endif PetscFunctionBegin; comm = ((PetscObject)C)->comm; tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); #if defined(PETSC_USE_CTABLE) ierr = PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);CHKERRQ(ierr); #else ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,Mbs+1,PetscInt,&rtable);CHKERRQ(ierr); /* Create hash table for the mapping :row -> proc*/ for (i=0,j=0; irowners[i+1]; for (; jrangebs,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif w4[proc]++; } for (j=0; jrangebs,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; jA->data,*sB = (Mat_SeqBAIJ*)c->B->data; PetscInt *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; for (i=0; ij, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; ia, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar *),&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; iNbs*sizeof(PetscInt),&cmap[0]);CHKERRQ(ierr); for (i=1; iNbs; } #endif for (i=0; irangebs,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc == rank) { /* Get indices from matA and then from matB */ row = row - rstart; nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; #if defined (PETSC_USE_CTABLE) for (k=0; kdata); //if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); /* Initial matrix as if empty */ ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; itype_name);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); //ierr = MatSeqBAIJSetPreallocation(submats[i],1,0,lens[i]);CHKERRQ(ierr); ierr = MatSeqSBAIJSetPreallocation(submats[i],1,0,lens[i]);CHKERRQ(ierr); //ierr = MatSeqSBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by MatGetSubMatrices_MPISBAIJ()*/ } } /* Assemble the matrices */ /* First assemble the local rows */ { PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i; //MatScalar *imat_a; for (i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; //imat_a = mat->a; #if defined (PETSC_USE_CTABLE) lcol1_gcol1 = colmaps[i]; lrow1_grow1 = rowmaps[i]; #else cmap_i = cmap[i]; rmap_i = rmap[i]; #endif irow_i = irow[i]; jmax = nrow[i]; for (j=0; jrangebs,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc == rank) { row = row - rstart; nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; //vworkA = a_a + a_i[row]*bs2; //vworkB = b_a + b_i[row]*bs2; #if defined (PETSC_USE_CTABLE) ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr); row--; if (row < 0) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table"); } #else row = rmap_i[row + rstart]; #endif mat_i = imat_i[row]; //mat_a = imat_a + mat_i*bs2; mat_j = imat_j + mat_i; ilen_row = imat_ilen[row]; /* load the column indices for this row into cols*/ for (l=0; ldata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; //imat_a = mat->a; max1 = sbuf1_i[2*j]; for (k=0; kcmap->N, bs=C->rmap->bs; IS *is_new; PetscFunctionBegin; ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,bs,is_max,is,is_new);CHKERRQ(ierr); if (ov < 0){ SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} //------- new ---------- PetscBool flg=PETSC_FALSE; ierr = PetscOptionsHasName(PETSC_NULL, "-IncreaseOverlap_new", &flg);CHKERRQ(ierr); if (!flg){ /* previous non-scalable implementation */ for (i=0; irmap->N,Mbs=M/bs,*nidx,isz,iov; Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscMPIInt rank=c->rank; ierr = PetscMalloc((Mbs+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); /* Create is_row */ ierr = PetscMalloc(is_max*sizeof(IS **),&is_row);CHKERRQ(ierr); for (i=0; iNbs * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = is_max/nmax + ((is_max % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr); for (i=0,pos=0; idata; ai=asub_i->i;; /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr); ierr = ISGetIndices(is_new[i],&idx);CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&nis);CHKERRQ(ierr); for (l=0; ldata; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i, Mbs,i,j,k,*odata1,*odata2, proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,*iwork,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; comm = ((PetscObject)C)->comm; size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size);CHKERRQ(ierr); ierr = PetscMalloc2(len,PetscBT,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,char,&t_p);CHKERRQ(ierr); for (i=0; igarray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local);CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); ierr = ISDestroy(garray_local);CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); Bowners[0] = 0; for (i=0; irow to proc_id) */ ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr); for (proc_id=0,j=0; proc_idrmap->range[proc_id+1]/bs; j++) { ctable[j] = proc_id; } } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); for (i=0; i len)len = len_r1[i]; } ierr = PetscFree(len_r1);CHKERRQ(ierr); ierr = PetscFree(id_r1);CHKERRQ(ierr); for (proc_id=0; proc_id= len_max */ k = 0; while (k < nrqr){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i 1+is_max){ /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; idata; Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; PetscErrorCode ierr; PetscInt row,mbs,Mbs,*nidx_i,col,col_max,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l; PetscInt a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; PetscBT table0; /* mark the indices of input is[] for look up */ PetscBT table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */ PetscFunctionBegin; Mbs = c->Mbs; mbs = a->mbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; rstart = c->rstartbs; is_max = data[0]; ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr); nidx[0] = is_max; idx_i = data + is_max + 1; /* ptr to input is[0] array */ nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ for (i=0; i= Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index col %D >= Mbs %D",col,Mbs); if(!PetscBTLookupSet(table_i,col)) { ierr = PetscBTSet(table0,col);CHKERRQ(ierr); if (whose == MINE) {nidx_i[isz0] = col;} if (col_max < col) col_max = col; isz0++; } } if (whose == MINE) {isz = isz0;} k = 0; /* no. of indices from input is[i] that have been examined */ for (row=0; row= isz0) break; /* for (row=0; row col_max) break; if (PetscBTLookup(table0,col)){ if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} break; /* for l = start; l col_max) break; if (PetscBTLookup(table0,col)){ if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} break; /* for l = start; l