/* Routines to compute overlapping regions of a parallel MPI matrix. Used for finding submatrices that were shared across processors. */ #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> #include static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat,PetscInt*,PetscInt,PetscInt*,PetscBT*); PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscInt i,N=C->cmap->N, bs=C->rmap->bs,M=C->rmap->N,Mbs=M/bs,*nidx,isz,iov; IS *is_new,*is_row; Mat *submats; Mat_MPISBAIJ *c=(Mat_MPISBAIJ*)C->data; Mat_SeqSBAIJ *asub_i; PetscBT table; PetscInt *ai,brow,nz,nis,l,nmax,nstages_local,nstages,max_no,pos; const PetscInt *idx; PetscBool flg; PetscFunctionBegin; PetscCall(PetscMalloc1(is_max,&is_new)); /* Convert the indices into block format */ PetscCall(ISCompressIndicesGeneral(N,C->rmap->n,bs,is_max,is,is_new)); PetscCheckFalse(ov < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); /* ----- previous non-scalable implementation ----- */ flg = PETSC_FALSE; PetscCall(PetscOptionsHasName(NULL,NULL, "-IncreaseOverlap_old", &flg)); if (flg) { /* previous non-scalable implementation */ printf("use previous non-scalable implementation...\n"); for (i=0; iNbs * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = is_max/nmax + ((is_max % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ PetscCall(MPIU_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C))); for (iov=0; iovijonly = PETSC_TRUE; /* only matrix data structures are requested */ /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to trigger that */ PetscCall(PetscStrcpy(((PetscObject)c->A)->type_name,MATSEQBAIJ)); PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,submats+pos)); PetscCall(PetscStrcpy(((PetscObject)c->A)->type_name,MATSEQSBAIJ)); pos += max_no; } /* 2) Row search */ PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C,is_max,is_new)); /* 3) Column search */ for (i=0; idata; ai = asub_i->i; /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ PetscCall(PetscBTMemzero(Mbs,table)); PetscCall(ISGetIndices(is_new[i],&idx)); PetscCall(ISGetLocalSize(is_new[i],&nis)); for (l=0; ldata; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i; PetscInt Mbs,i,j,k,*odata1,*odata2; PetscInt proc_id,**odata2_ptr,*ctable=NULL,*btable,len_max,len_est; PetscInt proc_end=0,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); size = c->size; rank = c->rank; Mbs = c->Mbs; PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag1)); PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag2)); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size); PetscCall(PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p)); for (i=0; igarray from all processors */ PetscCall(ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local)); PetscCall(ISAllGather(garray_local, &garray_gl)); PetscCall(ISDestroy(&garray_local)); PetscCallMPI(MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm)); Bowners[0] = 0; for (i=0; irow to proc_id) */ PetscCall(PetscMalloc1(Mbs,&ctable)); for (proc_id=0,j=0; proc_idrmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id; } /* hash tables marking c->garray */ PetscCall(ISGetIndices(garray_gl,&idx_i)); for (i=0; i len) len = len_r1[i]; } PetscCall(PetscFree(len_r1)); PetscCall(PetscFree(id_r1)); for (proc_id=0; proc_id= len_max */ k = 0; while (k < nrqr) { /* Receive messages */ PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status)); if (flag) { PetscCallMPI(MPI_Get_count(&r_status,MPIU_INT,&len)); proc_id = r_status.MPI_SOURCE; PetscCallMPI(MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req)); PetscCallMPI(MPI_Wait(&r_req,&r_status)); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max) { /* allocate more space for odata2 */ PetscCall(PetscMalloc1(len_est+1,&odata2)); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable)); len = 1 + odata2[0]; for (i=0; i 1+is_max) { /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; idata; Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; PetscInt row,mbs,Mbs,*nidx_i,col,col_max,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l; PetscInt a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; PetscBT table0; /* mark the indices of input is[] for look up */ PetscBT table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */ PetscFunctionBegin; Mbs = c->Mbs; mbs = a->mbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; rstart = c->rstartbs; is_max = data[0]; PetscCall(PetscBTCreate(Mbs,&table0)); nidx[0] = is_max; idx_i = data + is_max + 1; /* ptr to input is[0] array */ nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ for (i=0; i= Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT,col,Mbs); if (!PetscBTLookupSet(table_i,col)) { PetscCall(PetscBTSet(table0,col)); if (whose == MINE) nidx_i[isz0] = col; if (col_max < col) col_max = col; isz0++; } } if (whose == MINE) isz = isz0; k = 0; /* no. of indices from input is[i] that have been examined */ for (row=0; row= isz0) break; /* for (row=0; row col_max) break; if (PetscBTLookup(table0,col)) { if (!PetscBTLookupSet(table_i,row+rstart)) nidx_i[isz++] = row+rstart; break; /* for l = start; l col_max) break; if (PetscBTLookup(table0,col)) { if (!PetscBTLookupSet(table_i,row+rstart)) nidx_i[isz++] = row+rstart; break; /* for l = start; l