/*$Id: baijov.c,v 1.53 2000/09/18 14:20:35 bsmith Exp bsmith $*/
/*
Routines to compute overlapping regions of a parallel MPI matrix
and to find submatrices that were shared across processors.
*/
#include "src/mat/impls/baij/mpi/mpibaij.h"
#include "petscbt.h"
static int MatIncreaseOverlap_MPIBAIJ_Once(Mat,int,IS *);
static int MatIncreaseOverlap_MPIBAIJ_Local(Mat,int,char **,int*,int**);
static int MatIncreaseOverlap_MPIBAIJ_Receive(Mat,int,int **,int**,int*);
EXTERN int MatGetRow_MPIBAIJ(Mat,int,int*,int**,Scalar**);
EXTERN int MatRestoreRow_MPIBAIJ(Mat,int,int*,int**,Scalar**);
#undef __FUNC__
#define __FUNC__ /**/"MatCompressIndicesGeneral_MPIBAIJ"
static int MatCompressIndicesGeneral_MPIBAIJ(Mat C,int imax,IS *is_in,IS *is_out)
{
Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)C->data;
int ierr,isz,bs = baij->bs,n,i,j,*idx,ival;
#if defined (PETSC_USE_CTABLE)
PetscTable gid1_lid1;
int tt, gid1, *nidx;
PetscTablePosition tpos;
#else
int Nbs,*nidx;
PetscBT table;
#endif
PetscFunctionBegin;
#if defined (PETSC_USE_CTABLE)
ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr);
#else
Nbs = baij->Nbs;
nidx = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(nidx);
ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr);
#endif
for (i=0; iNbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"index greater than mat-dim");
if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
#endif
}
ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
#if defined (PETSC_USE_CTABLE)
nidx = (int*)PetscMalloc((isz+1)*sizeof(int));CHKPTRQ(nidx);
ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
j = 0;
while (tpos) {
ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr);
if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"index greater than array-dim"); }
nidx[tt] = gid1 - 1;
j++;
}
if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"table error: jj != isz"); }
ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
ierr = PetscFree(nidx);CHKERRQ(ierr);
#else
ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
#endif
}
#if defined (PETSC_USE_CTABLE)
ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
#else
ierr = PetscBTDestroy(table);CHKERRQ(ierr);
ierr = PetscFree(nidx);CHKERRQ(ierr);
#endif
PetscFunctionReturn(0);
}
#undef __FUNC__
#define __FUNC__ /**/"MatCompressIndicesSorted_MPIBAIJ"
static int MatCompressIndicesSorted_MPIBAIJ(Mat C,int imax,IS *is_in,IS *is_out)
{
Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)C->data;
int ierr,bs=baij->bs,i,j,k,val,n,*idx,*nidx,*idx_local;
PetscTruth flg;
#if defined (PETSC_USE_CTABLE)
int maxsz;
#else
int Nbs=baij->Nbs;
#endif
PetscFunctionBegin;
for (i=0; i maxsz) maxsz = n;
}
nidx = (int*)PetscMalloc((maxsz+1)*sizeof(int));CHKPTRQ(nidx);
#else
nidx = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(nidx);
#endif
/* Now check if the indices are in block order */
for (i=0; i*/"MatExpandIndices_MPIBAIJ"
static int MatExpandIndices_MPIBAIJ(Mat C,int imax,IS *is_in,IS *is_out)
{
Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)C->data;
int ierr,bs = baij->bs,n,i,j,k,*idx,*nidx;
#if defined (PETSC_USE_CTABLE)
int maxsz;
#else
int Nbs = baij->Nbs;
#endif
PetscFunctionBegin;
#if defined (PETSC_USE_CTABLE)
/* Now check max size */
for (i=0,maxsz=0; i maxsz) maxsz = n*bs;
}
nidx = (int*)PetscMalloc((maxsz+1)*sizeof(int));CHKPTRQ(nidx);
#else
nidx = (int*)PetscMalloc((Nbs*bs+1)*sizeof(int));CHKPTRQ(nidx);
#endif
for (i=0; i*/"MatIncreaseOverlap_MPIBAIJ"
int MatIncreaseOverlap_MPIBAIJ(Mat C,int imax,IS *is,int ov)
{
int i,ierr;
IS *is_new;
PetscFunctionBegin;
is_new = (IS *)PetscMalloc(imax*sizeof(IS));CHKPTRQ(is_new);
/* Convert the indices into block format */
ierr = MatCompressIndicesGeneral_MPIBAIJ(C,imax,is,is_new);CHKERRQ(ierr);
if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative overlap specified\n");}
for (i=0; i is[1]
mesg [2] = sizeof(is[1]);
-----------
mesg [5] = 5 => is[5]
mesg [6] = sizeof(is[5]);
-----------
mesg [7]
mesg [n] datas[1]
-----------
mesg[n+1]
mesg[m] data(is[5])
-----------
Notes:
nrqs - no of requests sent (or to be sent out)
nrqr - no of requests recieved (which have to be or which have been processed
*/
#undef __FUNC__
#define __FUNC__ /**/"MatIncreaseOverlap_MPIBAIJ_Once"
static int MatIncreaseOverlap_MPIBAIJ_Once(Mat C,int imax,IS *is)
{
Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
int **idx,*n,*w1,*w2,*w3,*w4,*rtable,**data,len,*idx_i;
int size,rank,Mbs,i,j,k,ierr,**rbuf,row,proc,nrqs,msz,**outdat,**ptr;
int *ctr,*pa,tag,*tmp,bsz,nrqr,*isz,*isz1,**xdata,bsz1,**rbuf2;
PetscBT *table;
MPI_Comm comm;
MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
MPI_Status *s_status,*recv_status;
PetscFunctionBegin;
comm = C->comm;
tag = C->tag;
size = c->size;
rank = c->rank;
Mbs = c->Mbs;
len = (imax+1)*sizeof(int*)+ (imax + Mbs)*sizeof(int);
idx = (int**)PetscMalloc(len);CHKPTRQ(idx);
n = (int*)(idx + imax);
rtable = n + imax;
for (i=0; i proc*/
for (i=0,j=0; irowners[i+1];
for (; j*/"MatIncreaseOverlap_MPIBAIJ_Local"
/*
MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do
the work on the local processor.
Inputs:
C - MAT_MPIBAIJ;
imax - total no of index sets processed at a time;
table - an array of char - size = Mbs bits.
Output:
isz - array containing the count of the solution elements correspondign
to each index set;
data - pointer to the solutions
*/
static int MatIncreaseOverlap_MPIBAIJ_Local(Mat C,int imax,PetscBT *table,int *isz,int **data)
{
Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
Mat A = c->A,B = c->B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data;
int start,end,val,max,rstart,cstart,*ai,*aj;
int *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
PetscBT table_i;
PetscFunctionBegin;
rstart = c->rstart;
cstart = c->cstart;
ai = a->i;
aj = a->j;
bi = b->i;
bj = b->j;
garray = c->garray;
for (i=0; i*/"MatIncreaseOverlap_MPIBAIJ_Receive"
/*
MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages,
and return the output
Input:
C - the matrix
nrqr - no of messages being processed.
rbuf - an array of pointers to the recieved requests
Output:
xdata - array of messages to be sent back
isz1 - size of each message
For better efficiency perhaps we should malloc seperately each xdata[i],
then if a remalloc is required we need only copy the data for that one row
rather then all previous rows as it is now where a single large chunck of
memory is used.
*/
static int MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,int nrqr,int **rbuf,int **xdata,int * isz1)
{
Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
Mat A = c->A,B = c->B;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data;
int rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
int row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
int val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr;
int *rbuf_i,kmax,rbuf_0,ierr;
PetscBT xtable;
PetscFunctionBegin;
rank = c->rank;
Mbs = c->Mbs;
rstart = c->rstart;
cstart = c->cstart;
ai = a->i;
aj = a->j;
bi = b->i;
bj = b->j;
garray = c->garray;
for (i=0,ct=0,total_sz=0; iMbs) max1 = ct*(a->nz +b->nz)/c->Mbs;
else max1 = 1;
mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
xdata[0] = (int*)PetscMalloc(mem_estimate*sizeof(int));CHKPTRQ(xdata[0]);
++no_malloc;
ierr = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr);
ierr = PetscMemzero(isz1,nrqr*sizeof(int));CHKERRQ(ierr);
ct3 = 0;
for (i=0; i*/"MatGetSubMatrices_MPIBAIJ"
int MatGetSubMatrices_MPIBAIJ(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat **submat)
{
IS *isrow_new,*iscol_new;
Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
int nmax,nstages_local,nstages,i,pos,max_no,ierr;
PetscFunctionBegin;
/* The compression and expansion should be avoided. Does'nt point
out errors might change the indices hence buggey */
isrow_new = (IS *)PetscMalloc(2*(ismax+1)*sizeof(IS));CHKPTRQ(isrow_new);
iscol_new = isrow_new + ismax;
ierr = MatCompressIndicesSorted_MPIBAIJ(C,ismax,isrow,isrow_new);CHKERRQ(ierr);
ierr = MatCompressIndicesSorted_MPIBAIJ(C,ismax,iscol,iscol_new);CHKERRQ(ierr);
/* Allocate memory to hold all the submatrices */
if (scall != MAT_REUSE_MATRIX) {
*submat = (Mat *)PetscMalloc((ismax+1)*sizeof(Mat));CHKPTRQ(*submat);
}
/* Determine the number of stages through which submatrices are done */
nmax = 20*1000000 / (c->Nbs * sizeof(int));
if (!nmax) nmax = 1;
nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
/* Make sure every porcessor loops through the nstages */
ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr);
for (i=0,pos=0; i numprocs) fproc = numprocs;
while (gid < proc_gnode[fproc] || gid >= proc_gnode[fproc+1]) {
if (gid < proc_gnode[fproc]) fproc--;
else fproc++;
}
/* if(fproc<0 || fproc>=numprocs) { SETERRQ(1,1,"fproc < 0 || fproc >= numprocs"); }*/
*proc = fproc;
PetscFunctionReturn(0);
}
#endif
/* -------------------------------------------------------------------------*/
#undef __FUNC__
#define __FUNC__ /**/"MatGetSubMatrices_MPIBAIJ_local"
static int MatGetSubMatrices_MPIBAIJ_local(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat *submats)
{
Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
Mat A = c->A;
Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat;
int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,start,end,size;
int **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,ierr,**rbuf1,row,proc;
int nrqs,msz,**ptr,index,*req_size,*ctr,*pa,*tmp,tcol,bsz,nrqr;
int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
int **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
int bs=c->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB;
int cstart = c->cstart,nzA,nzB,*a_i=a->i,*b_i=b->i,imark;
int *bmap = c->garray,ctmp,rstart=c->rstart,tag0,tag1,tag2,tag3;
MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
MPI_Request *r_waits4,*s_waits3,*s_waits4;
MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
MPI_Status *r_status3,*r_status4,*s_status4;
MPI_Comm comm;
MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB;
MatScalar *a_a=a->a,*b_a=b->a;
PetscTruth flag;
int *rw1;
#if defined (PETSC_USE_CTABLE)
int tt;
PetscTable *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1;
#else
int **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs;
#endif
PetscFunctionBegin;
comm = C->comm;
tag0 = C->tag;
size = c->size;
rank = c->rank;
/* Get some new tags to keep the communication clean */
ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);
ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr);
/* Check if the col indices are sorted */
for (i=0; i proc*/
for (i=0,j=0; irowners[i+1];
for (; jrowners,&proc);CHKERRQ(ierr);
#else
proc = rtable[row];
#endif
w4[proc]++;
}
for (j=0; jrowners,&proc);CHKERRQ(ierr);
#else
proc = rtable[row];
#endif
if (proc != rank) { /* copy to the outgoing buf*/
ctr[proc]++;
*ptr[proc] = row;
ptr[proc]++;
}
}
/* Update the headers for the current IS */
for (j=0; jA->data,*sB = (Mat_SeqBAIJ*)c->B->data;
int *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i;
for (i=0; ij, and send them off */
sbuf_aj = (int **)PetscMalloc((nrqr+1)*sizeof(int *));CHKPTRQ(sbuf_aj);
for (i=0,j=0; ia, and send them off */
sbuf_aa = (MatScalar **)PetscMalloc((nrqr+1)*sizeof(MatScalar *));CHKPTRQ(sbuf_aa);
for (i=0,j=0; iNbs*sizeof(int);
cmap = (int **)PetscMalloc(len);CHKPTRQ(cmap);
cmap[0] = (int *)(cmap + ismax);
ierr = PetscMemzero(cmap[0],(1+ismax*c->Nbs)*sizeof(int));CHKERRQ(ierr);
for (i=1; iNbs; }
#endif
for (i=0; irowners,&proc);CHKERRQ(ierr);
#else
proc = rtable[row];
#endif
if (proc == rank) {
/* Get indices from matA and then from matB */
row = row - rstart;
nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row];
cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];
#if defined (PETSC_USE_CTABLE)
for (k=0; kdata);
if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || mat->bs != bs)) {
SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong size");
}
ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(int),&flag);CHKERRQ(ierr);
if (flag == PETSC_FALSE) {
SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Cannot reuse matrix. wrong no of nonzeros");
}
/* Initial matrix as if empty */
ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(int));CHKERRQ(ierr);
submats[i]->factor = C->factor;
}
} else {
for (i=0; ibs,nrow[i]*bs,ncol[i]*bs,0,lens[i],submats+i);CHKERRQ(ierr);
}
}
/* Assemble the matrices */
/* First assemble the local rows */
{
int ilen_row,*imat_ilen,*imat_j,*imat_i;
MatScalar *imat_a;
for (i=0; idata;
imat_ilen = mat->ilen;
imat_j = mat->j;
imat_i = mat->i;
imat_a = mat->a;
#if defined (PETSC_USE_CTABLE)
lcol1_gcol1 = colmaps[i];
lrow1_grow1 = rowmaps[i];
#else
cmap_i = cmap[i];
rmap_i = rmap[i];
#endif
irow_i = irow[i];
jmax = nrow[i];
for (j=0; jrowners,&proc);CHKERRQ(ierr);
#else
proc = rtable[row];
#endif
if (proc == rank) {
row = row - rstart;
nzA = a_i[row+1] - a_i[row];
nzB = b_i[row+1] - b_i[row];
cworkA = a_j + a_i[row];
cworkB = b_j + b_i[row];
vworkA = a_a + a_i[row]*bs2;
vworkB = b_a + b_i[row]*bs2;
#if defined (PETSC_USE_CTABLE)
ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr);
row--;
if (row < 0) { SETERRQ(1,1,"row not found in table"); }
#else
row = rmap_i[row + rstart];
#endif
mat_i = imat_i[row];
mat_a = imat_a + mat_i*bs2;
mat_j = imat_j + mat_i;
ilen_row = imat_ilen[row];
/* load the column indices for this row into cols*/
for (l=0; ldata;
imat_ilen = mat->ilen;
imat_j = mat->j;
imat_i = mat->i;
imat_a = mat->a;
max1 = sbuf1_i[2*j];
for (k=0; k