#ifndef lint static char vcid[] = "$Id: baijov.c,v 1.3 1996/07/04 13:01:01 bsmith Exp balay $"; #endif /* Routines to compute overlapping regions of a parallel MPI matrix and to find submatrices that were shared across processors. */ #include "mpibaij.h" #include "src/inline/bitarray.h" static int MatIncreaseOverlap_MPIBAIJ_Once(Mat, int, IS *); static int MatIncreaseOverlap_MPIBAIJ_Local(Mat , int , char **,int*, int**); static int MatIncreaseOverlap_MPIBAIJ_Receive(Mat , int, int **, int**, int* ); extern int MatGetRow_MPIBAIJ(Mat,int,int*,int**,Scalar**); extern int MatRestoreRow_MPIBAIJ(Mat,int,int*,int**,Scalar**); static int MatCompressIndices_MPIBAIJ(Mat C, int imax, IS *is) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) C->data; int ierr,isz,bs = baij->bs,Nbs,n,i,j,*idx,*nidx,ival; char *table; Nbs = baij->Nbs; table = (char *) PetscMalloc((Nbs/BITSPERBYTE +1)*sizeof(char)); CHKPTRQ(table); nidx = (int *) PetscMalloc((Nbs+1)*sizeof(int)); CHKPTRQ(nidx); for ( i=0; iNbs) SETERRQ(1,"MatIncreaseOverlap_SeqBAIJ: index greater than mat-dim"); if(!BT_LOOKUP(table, ival)) { nidx[isz++] = ival;} } ierr = ISRestoreIndices(is[i],&idx); CHKERRQ(ierr); ierr = ISDestroy(is[i]); CHKERRQ(ierr); ierr = ISCreateSeq(MPI_COMM_SELF, isz, nidx, (is+i)); CHKERRQ(ierr); } PetscFree(table); PetscFree(nidx); return 0; } static int MatExpandIndices_MPIBAIJ(Mat C, int imax, IS *is) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) C->data; int ierr,bs = baij->bs,Nbs,n,i,j,k,*idx,*nidx; Nbs = baij->Nbs; nidx = (int *) PetscMalloc((Nbs*bs+1)*sizeof(int)); CHKPTRQ(nidx); for ( i=0; i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [5] = 5 => is[5] mesg [6] = sizeof(is[5]); ----------- mesg [7] mesg [n] datas[1] ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests recieved (which have to be or which have been processed */ static int MatIncreaseOverlap_MPIBAIJ_Once(Mat C, int imax, IS *is) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ *) C->data; int **idx, *n, *w1, *w2, *w3, *w4, *rtable,**data,len,*idx_i; int size,rank,Mbs,i,j,k,ierr,**rbuf,row,proc,nrqs,msz,**outdat,**ptr; int *ctr,*pa,tag,*tmp,bsz,nrqr,*isz,*isz1,**xdata,bsz1,**rbuf2; char **table; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *s_status,*recv_status; comm = C->comm; tag = C->tag; size = c->size; rank = c->rank; Mbs = c->Mbs; len = (imax+1)*sizeof(int *) + (imax + Mbs)*sizeof(int); idx = (int **) PetscMalloc(len); CHKPTRQ(idx); n = (int *) (idx + imax); rtable = n + imax; for ( i=0; i proc*/ for ( i=0,j=0; irowners[i+1]; for ( ; jdata; Mat A = c->A, B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; int start, end, val, max, rstart,cstart,*ai, *aj; int *bi, *bj, *garray, i, j, k, row,*data_i,isz_i; char *table_i; rstart = c->rstart; cstart = c->cstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for ( i=0; idata; Mat A = c->A, B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; int rstart,cstart,*ai, *aj, *bi, *bj, *garray, i, j, k; int row,total_sz,ct, ct1, ct2, ct3,mem_estimate, oct2, l, start, end; int val, max1, max2, rank, Mbs, no_malloc =0, *tmp, new_estimate, ctr; int *rbuf_i,kmax,rbuf_0; char *xtable; rank = c->rank; Mbs = c->Mbs; rstart = c->rstart; cstart = c->cstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for ( i=0,ct=0,total_sz=0; inz +b->nz)/c->Mbs; mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); xdata[0] = (int *)PetscMalloc(mem_estimate*sizeof(int)); CHKPTRQ(xdata[0]); ++no_malloc; xtable = (char *)PetscMalloc((Mbs/BITSPERBYTE+1)*sizeof(char)); CHKPTRQ(xtable); PetscMemzero(isz1,nrqr*sizeof(int)); ct3 = 0; for ( i=0; idata; Mat A = c->A,*submats = *submat; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data, *b = (Mat_SeqBAIJ*)c->B->data, *mat; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size; int **sbuf1,**sbuf2, rank, Mbs,i,j,k,l,ct1,ct2,ierr, **rbuf1,row,proc; int nrqs, msz, **ptr,index,*req_size,*ctr,*pa,tag,*tmp,tcol,bsz,nrqr; int **rbuf3,*req_source,**sbuf_aj, **rbuf2, max1,max2,**rmap; int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i; int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i; int *rmap_i,bs=c->bs,bs2=c->bs2; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; Scalar **rbuf4, **sbuf_aa, *vals, *mat_a, *sbuf_aa_i; comm = C->comm; tag = C->tag; size = c->size; rank = c->rank; Mbs = c->Mbs; /* Check if the col indices are sorted */ for ( i=0; i proc*/ for ( i=0,j=0; irowners[i+1]; for ( ; jA->data, *sB = (Mat_SeqBAIJ*) c->B->data; int *sAi = sA->i, *sBi = sB->i, id, rstart = c->rstart; int *sbuf2_i; for ( i=0; ij, and send them off */ sbuf_aj = (int **)PetscMalloc((nrqr+1)*sizeof(int *));CHKPTRQ(sbuf_aj); for ( i=0,j=0; ii, *b_i = b->i, imark; int *cworkA, *cworkB, cstart = c->cstart, *bmap = c->garray; int *a_j = a->j, *b_j = b->j,ctmp; for ( i=0; ia, and send them off */ sbuf_aa = (Scalar **)PetscMalloc((nrqr+1)*sizeof(Scalar *));CHKPTRQ(sbuf_aa); for ( i=0,j=0; ii, *b_i = b->i, imark; int *cworkA, *cworkB, cstart = c->cstart, *bmap = c->garray; int *a_j = a->j, *b_j = b->j; Scalar *vworkA, *vworkB, *a_a = a->a, *b_a = b->a; for ( i=0; iNbs*sizeof(int); cmap = (int **)PetscMalloc(len); CHKPTRQ(cmap); cmap[0] = (int *)(cmap + ismax); PetscMemzero(cmap[0],(1+ismax*c->Nbs)*sizeof(int)); for ( i=1; iNbs; } for ( i=0; iMbs*sizeof(int); rmap = (int **)PetscMalloc(len); CHKPTRQ(rmap); rmap[0] = (int *)(rmap + ismax); PetscMemzero(rmap[0],ismax*c->Mbs*sizeof(int)); for ( i=1; iMbs;} for ( i=0; idata); if ((mat->m != nrow[i]) || (mat->n != ncol[i])) { SETERRQ(1,"MatGetSubmatrices_MPIBAIJ:Cannot reuse matrix. wrong size"); } if (PetscMemcmp(mat->ilen,lens[i], mat->m *sizeof(int))) { SETERRQ(1,"MatGetSubmatrices_MPIBAIJ:Cannot reuse matrix. wrong no of nonzeros"); } /* Initial matrix as if empty */ PetscMemzero(mat->ilen,mat->m*sizeof(int)); } } else { *submat = submats = (Mat *)PetscMalloc(ismax*sizeof(Mat)); CHKPTRQ(submats); for ( i=0; ibs,nrow[i]*bs,ncol[i]*bs,0,lens[i],submats+i);CHKERRQ(ierr); } } /* Assemble the matrices */ /* First assemble the local rows */ { int ilen_row,*imat_ilen, *imat_j, *imat_i,old_row; Scalar *imat_a; for ( i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for ( j=0; jdata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; max1 = sbuf1_i[2*j]; for ( k=0; k