179bdfe76SSatish Balay #ifndef lint 2*93bc47c4SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.14 1996/07/12 17:50:29 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 15*93bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *); 16*93bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *); 17*93bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double); 18*93bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec); 19*93bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 20*93bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec); 21*93bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec); 22*93bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *); 2357b952d6SSatish Balay 2457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 2557b952d6SSatish Balay number to the local number in the off-diagonal part of the local 2657b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 2757b952d6SSatish Balay length of colmap equals the global matrix length. 2857b952d6SSatish Balay */ 2957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 3057b952d6SSatish Balay { 3157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 3357b952d6SSatish Balay int nbs = B->nbs,i; 3457b952d6SSatish Balay 3557b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 3657b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 3757b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 3857b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 3957b952d6SSatish Balay return 0; 4057b952d6SSatish Balay } 4157b952d6SSatish Balay 4257b952d6SSatish Balay 43acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 4457b952d6SSatish Balay { 4557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4657b952d6SSatish Balay int ierr; 4757b952d6SSatish Balay if (baij->size == 1) { 4857b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 4957b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 5057b952d6SSatish Balay return 0; 5157b952d6SSatish Balay } 5257b952d6SSatish Balay 5357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 5457b952d6SSatish Balay { 5557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 5657b952d6SSatish Balay Scalar value; 5757b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 5857b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 5957b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 6057b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 6157b952d6SSatish Balay 6257b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 6357b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 6457b952d6SSatish Balay } 6557b952d6SSatish Balay baij->insertmode = addv; 6657b952d6SSatish Balay rstart_orig = rstart*bs; 6757b952d6SSatish Balay rend_orig = rend*bs; 6857b952d6SSatish Balay cstart_orig = cstart*bs; 6957b952d6SSatish Balay cend_orig = cend*bs; 7057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 7157b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 7257b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 7357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 7457b952d6SSatish Balay row = im[i] - rstart_orig; 7557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 7657b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 7757b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 7857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 7957b952d6SSatish Balay col = in[j] - cstart_orig; 8057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8157b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8257b952d6SSatish Balay } 8357b952d6SSatish Balay else { 8457b952d6SSatish Balay if (mat->was_assembled) { 8557b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 8658667388SSatish Balay col = baij->colmap[in[j]/bs] +in[j]%bs; 8757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 8857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 8957b952d6SSatish Balay col = in[j]; 9057b952d6SSatish Balay } 9157b952d6SSatish Balay } 9257b952d6SSatish Balay else col = in[j]; 9357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 9457b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 9557b952d6SSatish Balay } 9657b952d6SSatish Balay } 9757b952d6SSatish Balay } 9857b952d6SSatish Balay else { 9957b952d6SSatish Balay if (roworiented) { 10057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 10157b952d6SSatish Balay } 10257b952d6SSatish Balay else { 10357b952d6SSatish Balay row = im[i]; 10457b952d6SSatish Balay for ( j=0; j<n; j++ ) { 10557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 10657b952d6SSatish Balay } 10757b952d6SSatish Balay } 10857b952d6SSatish Balay } 10957b952d6SSatish Balay } 11057b952d6SSatish Balay return 0; 11157b952d6SSatish Balay } 11257b952d6SSatish Balay 113d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 114d6de1c52SSatish Balay { 115d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 116d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 117d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 118d6de1c52SSatish Balay 119d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 120d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 121d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 122d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 123d6de1c52SSatish Balay row = idxm[i] - bsrstart; 124d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 125d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 126d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 127d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 128d6de1c52SSatish Balay col = idxn[j] - bscstart; 129d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 130d6de1c52SSatish Balay } 131d6de1c52SSatish Balay else { 132acdf5bf4SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 133d9d09a02SSatish Balay if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0; 134d9d09a02SSatish Balay else { 135d9d09a02SSatish Balay col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs; 136d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 137d6de1c52SSatish Balay } 138d6de1c52SSatish Balay } 139d6de1c52SSatish Balay } 140d9d09a02SSatish Balay } 141d6de1c52SSatish Balay else { 142d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 143d6de1c52SSatish Balay } 144d6de1c52SSatish Balay } 145d6de1c52SSatish Balay return 0; 146d6de1c52SSatish Balay } 147d6de1c52SSatish Balay 148d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 149d6de1c52SSatish Balay { 150d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 151d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 152acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 153d6de1c52SSatish Balay double sum = 0.0; 154d6de1c52SSatish Balay Scalar *v; 155d6de1c52SSatish Balay 156d6de1c52SSatish Balay if (baij->size == 1) { 157d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 158d6de1c52SSatish Balay } else { 159d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 160d6de1c52SSatish Balay v = amat->a; 161d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 162d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 163d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 164d6de1c52SSatish Balay #else 165d6de1c52SSatish Balay sum += (*v)*(*v); v++; 166d6de1c52SSatish Balay #endif 167d6de1c52SSatish Balay } 168d6de1c52SSatish Balay v = bmat->a; 169d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 170d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 171d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 172d6de1c52SSatish Balay #else 173d6de1c52SSatish Balay sum += (*v)*(*v); v++; 174d6de1c52SSatish Balay #endif 175d6de1c52SSatish Balay } 176d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 177d6de1c52SSatish Balay *norm = sqrt(*norm); 178d6de1c52SSatish Balay } 179acdf5bf4SSatish Balay else 180acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 181d6de1c52SSatish Balay } 182d6de1c52SSatish Balay return 0; 183d6de1c52SSatish Balay } 18457b952d6SSatish Balay 18557b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 18657b952d6SSatish Balay { 18757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 18857b952d6SSatish Balay MPI_Comm comm = mat->comm; 18957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 19057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 19157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 19257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 19357b952d6SSatish Balay InsertMode addv; 19457b952d6SSatish Balay Scalar *rvalues,*svalues; 19557b952d6SSatish Balay 19657b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 19757b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 19857b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 19957b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 20057b952d6SSatish Balay } 20157b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 20257b952d6SSatish Balay 20357b952d6SSatish Balay /* first count number of contributors to each processor */ 20457b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 20557b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 20657b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 20757b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 20857b952d6SSatish Balay idx = baij->stash.idx[i]; 20957b952d6SSatish Balay for ( j=0; j<size; j++ ) { 21057b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 21157b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 21257b952d6SSatish Balay } 21357b952d6SSatish Balay } 21457b952d6SSatish Balay } 21557b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 21657b952d6SSatish Balay 21757b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 21857b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 21957b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 22057b952d6SSatish Balay nreceives = work[rank]; 22157b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 22257b952d6SSatish Balay nmax = work[rank]; 22357b952d6SSatish Balay PetscFree(work); 22457b952d6SSatish Balay 22557b952d6SSatish Balay /* post receives: 22657b952d6SSatish Balay 1) each message will consist of ordered pairs 22757b952d6SSatish Balay (global index,value) we store the global index as a double 22857b952d6SSatish Balay to simplify the message passing. 22957b952d6SSatish Balay 2) since we don't know how long each individual message is we 23057b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 23157b952d6SSatish Balay this is a lot of wasted space. 23257b952d6SSatish Balay 23357b952d6SSatish Balay 23457b952d6SSatish Balay This could be done better. 23557b952d6SSatish Balay */ 23657b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 23757b952d6SSatish Balay CHKPTRQ(rvalues); 23857b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 23957b952d6SSatish Balay CHKPTRQ(recv_waits); 24057b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 24157b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 24257b952d6SSatish Balay comm,recv_waits+i); 24357b952d6SSatish Balay } 24457b952d6SSatish Balay 24557b952d6SSatish Balay /* do sends: 24657b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 24757b952d6SSatish Balay the ith processor 24857b952d6SSatish Balay */ 24957b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 25057b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 25157b952d6SSatish Balay CHKPTRQ(send_waits); 25257b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 25357b952d6SSatish Balay starts[0] = 0; 25457b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 25557b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 25657b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 25757b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 25857b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 25957b952d6SSatish Balay } 26057b952d6SSatish Balay PetscFree(owner); 26157b952d6SSatish Balay starts[0] = 0; 26257b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 26357b952d6SSatish Balay count = 0; 26457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 26557b952d6SSatish Balay if (procs[i]) { 26657b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 26757b952d6SSatish Balay comm,send_waits+count++); 26857b952d6SSatish Balay } 26957b952d6SSatish Balay } 27057b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 27157b952d6SSatish Balay 27257b952d6SSatish Balay /* Free cache space */ 27357b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 27457b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 27557b952d6SSatish Balay 27657b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 27757b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 27857b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 27957b952d6SSatish Balay baij->rmax = nmax; 28057b952d6SSatish Balay 28157b952d6SSatish Balay return 0; 28257b952d6SSatish Balay } 28357b952d6SSatish Balay 28457b952d6SSatish Balay 28557b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 28657b952d6SSatish Balay { 28757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 28857b952d6SSatish Balay MPI_Status *send_status,recv_status; 28957b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 29057b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 29157b952d6SSatish Balay Scalar *values,val; 29257b952d6SSatish Balay InsertMode addv = baij->insertmode; 29357b952d6SSatish Balay 29457b952d6SSatish Balay /* wait on receives */ 29557b952d6SSatish Balay while (count) { 29657b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 29757b952d6SSatish Balay /* unpack receives into our local space */ 29857b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 29957b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 30057b952d6SSatish Balay n = n/3; 30157b952d6SSatish Balay for ( i=0; i<n; i++ ) { 30257b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 30357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 30457b952d6SSatish Balay val = values[3*i+2]; 30557b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 30657b952d6SSatish Balay col -= baij->cstart*bs; 30757b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 30857b952d6SSatish Balay } 30957b952d6SSatish Balay else { 31057b952d6SSatish Balay if (mat->was_assembled) { 31157b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 31257b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 31357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 31457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 31557b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 31657b952d6SSatish Balay } 31757b952d6SSatish Balay } 31857b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 31957b952d6SSatish Balay } 32057b952d6SSatish Balay } 32157b952d6SSatish Balay count--; 32257b952d6SSatish Balay } 32357b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 32457b952d6SSatish Balay 32557b952d6SSatish Balay /* wait on sends */ 32657b952d6SSatish Balay if (baij->nsends) { 32757b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 32857b952d6SSatish Balay CHKPTRQ(send_status); 32957b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 33057b952d6SSatish Balay PetscFree(send_status); 33157b952d6SSatish Balay } 33257b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 33357b952d6SSatish Balay 33457b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 33557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 33657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 33757b952d6SSatish Balay 33857b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 33957b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 34057b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 34157b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 34257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 34357b952d6SSatish Balay } 34457b952d6SSatish Balay 3456d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 34657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 34757b952d6SSatish Balay } 34857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 34957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 35057b952d6SSatish Balay 35157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 35257b952d6SSatish Balay return 0; 35357b952d6SSatish Balay } 35457b952d6SSatish Balay 35557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 35657b952d6SSatish Balay { 35757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 35857b952d6SSatish Balay int ierr; 35957b952d6SSatish Balay 36057b952d6SSatish Balay if (baij->size == 1) { 36157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 36257b952d6SSatish Balay } 36357b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 36457b952d6SSatish Balay return 0; 36557b952d6SSatish Balay } 36657b952d6SSatish Balay 36757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 36857b952d6SSatish Balay { 36957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 370cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 37157b952d6SSatish Balay FILE *fd; 37257b952d6SSatish Balay ViewerType vtype; 37357b952d6SSatish Balay 37457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 37557b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 37657b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 37757b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 37857b952d6SSatish Balay int nz, nzalloc, mem; 37957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 38057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 38157b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 38257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 38357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 38457b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 38557b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 38657b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 38757b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 38857b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 38957b952d6SSatish Balay fflush(fd); 39057b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 39157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 39257b952d6SSatish Balay return 0; 39357b952d6SSatish Balay } 39457b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 39557b952d6SSatish Balay return 0; 39657b952d6SSatish Balay } 39757b952d6SSatish Balay } 39857b952d6SSatish Balay 39957b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 40057b952d6SSatish Balay Draw draw; 40157b952d6SSatish Balay PetscTruth isnull; 40257b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 40357b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 40457b952d6SSatish Balay } 40557b952d6SSatish Balay 40657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 40757b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 40857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 40957b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 41057b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 41157b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 41257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 41357b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 41457b952d6SSatish Balay fflush(fd); 41557b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 41657b952d6SSatish Balay } 41757b952d6SSatish Balay else { 41857b952d6SSatish Balay int size = baij->size; 41957b952d6SSatish Balay rank = baij->rank; 42057b952d6SSatish Balay if (size == 1) { 42157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 42257b952d6SSatish Balay } 42357b952d6SSatish Balay else { 42457b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 42557b952d6SSatish Balay Mat A; 42657b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 42757b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 42857b952d6SSatish Balay int mbs=baij->mbs; 42957b952d6SSatish Balay Scalar *a; 43057b952d6SSatish Balay 43157b952d6SSatish Balay if (!rank) { 432cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 43357b952d6SSatish Balay CHKERRQ(ierr); 43457b952d6SSatish Balay } 43557b952d6SSatish Balay else { 436cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 43757b952d6SSatish Balay CHKERRQ(ierr); 43857b952d6SSatish Balay } 43957b952d6SSatish Balay PLogObjectParent(mat,A); 44057b952d6SSatish Balay 44157b952d6SSatish Balay /* copy over the A part */ 44257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 44357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 44457b952d6SSatish Balay row = baij->rstart; 44557b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 44657b952d6SSatish Balay 44757b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 44857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 44957b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 45057b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 45157b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 45257b952d6SSatish Balay for (k=0; k<bs; k++ ) { 453cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 454cee3aa6bSSatish Balay col++; a += bs; 45557b952d6SSatish Balay } 45657b952d6SSatish Balay } 45757b952d6SSatish Balay } 45857b952d6SSatish Balay /* copy over the B part */ 45957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 46057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 46157b952d6SSatish Balay row = baij->rstart*bs; 46257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 46357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 46457b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 46557b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 46657b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 46757b952d6SSatish Balay for (k=0; k<bs; k++ ) { 468cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 469cee3aa6bSSatish Balay col++; a += bs; 47057b952d6SSatish Balay } 47157b952d6SSatish Balay } 47257b952d6SSatish Balay } 47357b952d6SSatish Balay PetscFree(rvals); 4746d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 4756d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 47657b952d6SSatish Balay if (!rank) { 47757b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 47857b952d6SSatish Balay } 47957b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 48057b952d6SSatish Balay } 48157b952d6SSatish Balay } 48257b952d6SSatish Balay return 0; 48357b952d6SSatish Balay } 48457b952d6SSatish Balay 48557b952d6SSatish Balay 48657b952d6SSatish Balay 48757b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 48857b952d6SSatish Balay { 48957b952d6SSatish Balay Mat mat = (Mat) obj; 49057b952d6SSatish Balay int ierr; 49157b952d6SSatish Balay ViewerType vtype; 49257b952d6SSatish Balay 49357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 49457b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 49557b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 49657b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 49757b952d6SSatish Balay } 49857b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 49957b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 50057b952d6SSatish Balay } 50157b952d6SSatish Balay return 0; 50257b952d6SSatish Balay } 50357b952d6SSatish Balay 50479bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 50579bdfe76SSatish Balay { 50679bdfe76SSatish Balay Mat mat = (Mat) obj; 50779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 50879bdfe76SSatish Balay int ierr; 50979bdfe76SSatish Balay 51079bdfe76SSatish Balay #if defined(PETSC_LOG) 51179bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 51279bdfe76SSatish Balay #endif 51379bdfe76SSatish Balay 51479bdfe76SSatish Balay PetscFree(baij->rowners); 51579bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 51679bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 51779bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 51879bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 51979bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 52079bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 52179bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 52279bdfe76SSatish Balay PetscFree(baij); 52379bdfe76SSatish Balay PLogObjectDestroy(mat); 52479bdfe76SSatish Balay PetscHeaderDestroy(mat); 52579bdfe76SSatish Balay return 0; 52679bdfe76SSatish Balay } 52779bdfe76SSatish Balay 528cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 529cee3aa6bSSatish Balay { 530cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 53147b4a8eaSLois Curfman McInnes int ierr, nt; 532cee3aa6bSSatish Balay 53347b4a8eaSLois Curfman McInnes ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 53447b4a8eaSLois Curfman McInnes if (nt != a->n) { 53547b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 53647b4a8eaSLois Curfman McInnes } 53747b4a8eaSLois Curfman McInnes ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 53847b4a8eaSLois Curfman McInnes if (nt != a->m) { 53947b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 54047b4a8eaSLois Curfman McInnes } 541cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 542cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 543cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 544cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 545cee3aa6bSSatish Balay return 0; 546cee3aa6bSSatish Balay } 547cee3aa6bSSatish Balay 548cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 549cee3aa6bSSatish Balay { 550cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 551cee3aa6bSSatish Balay int ierr; 552cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 553cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 554cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 555cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 556cee3aa6bSSatish Balay return 0; 557cee3aa6bSSatish Balay } 558cee3aa6bSSatish Balay 559cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 560cee3aa6bSSatish Balay { 561cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 562cee3aa6bSSatish Balay int ierr; 563cee3aa6bSSatish Balay 564cee3aa6bSSatish Balay /* do nondiagonal part */ 565cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 566cee3aa6bSSatish Balay /* send it on its way */ 567cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 568cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 569cee3aa6bSSatish Balay /* do local part */ 570cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 571cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 572cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 573cee3aa6bSSatish Balay /* but is not perhaps always true. */ 574cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 575cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 576cee3aa6bSSatish Balay return 0; 577cee3aa6bSSatish Balay } 578cee3aa6bSSatish Balay 579cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 580cee3aa6bSSatish Balay { 581cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 582cee3aa6bSSatish Balay int ierr; 583cee3aa6bSSatish Balay 584cee3aa6bSSatish Balay /* do nondiagonal part */ 585cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 586cee3aa6bSSatish Balay /* send it on its way */ 587cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 588cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 589cee3aa6bSSatish Balay /* do local part */ 590cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 591cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 592cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 593cee3aa6bSSatish Balay /* but is not perhaps always true. */ 594cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 595cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 596cee3aa6bSSatish Balay return 0; 597cee3aa6bSSatish Balay } 598cee3aa6bSSatish Balay 599cee3aa6bSSatish Balay /* 600cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 601cee3aa6bSSatish Balay diagonal block 602cee3aa6bSSatish Balay */ 603cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 604cee3aa6bSSatish Balay { 605cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 606cee3aa6bSSatish Balay if (a->M != a->N) 607cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 608cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 609cee3aa6bSSatish Balay } 610cee3aa6bSSatish Balay 611cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 612cee3aa6bSSatish Balay { 613cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 614cee3aa6bSSatish Balay int ierr; 615cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 616cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 617cee3aa6bSSatish Balay return 0; 618cee3aa6bSSatish Balay } 61957b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 62057b952d6SSatish Balay { 62157b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62257b952d6SSatish Balay *m = mat->M; *n = mat->N; 62357b952d6SSatish Balay return 0; 62457b952d6SSatish Balay } 62557b952d6SSatish Balay 62657b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 62757b952d6SSatish Balay { 62857b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62957b952d6SSatish Balay *m = mat->m; *n = mat->N; 63057b952d6SSatish Balay return 0; 63157b952d6SSatish Balay } 63257b952d6SSatish Balay 63357b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 63457b952d6SSatish Balay { 63557b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 63657b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 63757b952d6SSatish Balay return 0; 63857b952d6SSatish Balay } 63957b952d6SSatish Balay 640acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 641acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 642acdf5bf4SSatish Balay 643acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 644acdf5bf4SSatish Balay { 645acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 646acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 647acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 648d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 649d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 650acdf5bf4SSatish Balay 651acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 652acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 653acdf5bf4SSatish Balay 654acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 655acdf5bf4SSatish Balay /* 656acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 657acdf5bf4SSatish Balay */ 658acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 659bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 660bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 661acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 662acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 663acdf5bf4SSatish Balay } 664acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 665acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 666acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 667acdf5bf4SSatish Balay } 668acdf5bf4SSatish Balay 669acdf5bf4SSatish Balay 670d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 671d9d09a02SSatish Balay lrow = row - brstart; 672acdf5bf4SSatish Balay 673acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 674acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 675acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 676acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 677acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 678acdf5bf4SSatish Balay nztot = nzA + nzB; 679acdf5bf4SSatish Balay 680acdf5bf4SSatish Balay cmap = mat->garray; 681acdf5bf4SSatish Balay if (v || idx) { 682acdf5bf4SSatish Balay if (nztot) { 683acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 684acdf5bf4SSatish Balay int imark = -1; 685acdf5bf4SSatish Balay if (v) { 686acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 687acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 688d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 689acdf5bf4SSatish Balay else break; 690acdf5bf4SSatish Balay } 691acdf5bf4SSatish Balay imark = i; 692acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 693acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 694acdf5bf4SSatish Balay } 695acdf5bf4SSatish Balay if (idx) { 696acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 697acdf5bf4SSatish Balay if (imark > -1) { 698acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 699bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 700acdf5bf4SSatish Balay } 701acdf5bf4SSatish Balay } else { 702acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 703d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 704d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 705acdf5bf4SSatish Balay else break; 706acdf5bf4SSatish Balay } 707acdf5bf4SSatish Balay imark = i; 708acdf5bf4SSatish Balay } 709d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 710d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 711acdf5bf4SSatish Balay } 712acdf5bf4SSatish Balay } 713d212a18eSSatish Balay else { 714d212a18eSSatish Balay if (idx) *idx = 0; 715d212a18eSSatish Balay if (v) *v = 0; 716d212a18eSSatish Balay } 717acdf5bf4SSatish Balay } 718acdf5bf4SSatish Balay *nz = nztot; 719acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 720acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 721acdf5bf4SSatish Balay return 0; 722acdf5bf4SSatish Balay } 723acdf5bf4SSatish Balay 724acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 725acdf5bf4SSatish Balay { 726acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 727acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 728acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 729acdf5bf4SSatish Balay } 730acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 731acdf5bf4SSatish Balay return 0; 732acdf5bf4SSatish Balay } 733acdf5bf4SSatish Balay 7345a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat, int *bs) 7355a838052SSatish Balay { 7365a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 7375a838052SSatish Balay *bs = baij->bs; 7385a838052SSatish Balay return 0; 7395a838052SSatish Balay } 7405a838052SSatish Balay 74158667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A) 74258667388SSatish Balay { 74358667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 74458667388SSatish Balay int ierr; 74558667388SSatish Balay ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 74658667388SSatish Balay ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 74758667388SSatish Balay return 0; 74858667388SSatish Balay } 74958667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op) 75058667388SSatish Balay { 75158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 75258667388SSatish Balay 75358667388SSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 75458667388SSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 75558667388SSatish Balay op == MAT_COLUMNS_SORTED || 75658667388SSatish Balay op == MAT_ROW_ORIENTED) { 75758667388SSatish Balay MatSetOption(a->A,op); 75858667388SSatish Balay MatSetOption(a->B,op); 75958667388SSatish Balay } 76058667388SSatish Balay else if (op == MAT_ROWS_SORTED || 76158667388SSatish Balay op == MAT_SYMMETRIC || 76258667388SSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 76358667388SSatish Balay op == MAT_YES_NEW_DIAGONALS) 76458667388SSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 76558667388SSatish Balay else if (op == MAT_COLUMN_ORIENTED) { 76658667388SSatish Balay a->roworiented = 0; 76758667388SSatish Balay MatSetOption(a->A,op); 76858667388SSatish Balay MatSetOption(a->B,op); 76958667388SSatish Balay } 77058667388SSatish Balay else if (op == MAT_NO_NEW_DIAGONALS) 77158667388SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");} 77258667388SSatish Balay else 77358667388SSatish Balay {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");} 77458667388SSatish Balay return 0; 77558667388SSatish Balay } 77658667388SSatish Balay 77779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 77879bdfe76SSatish Balay static struct _MatOps MatOps = { 779bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 780*93bc47c4SSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,MatSolve_MPIBAIJ, 781*93bc47c4SSatish Balay MatSolveAdd_MPIBAIJ,MatSolveTrans_MPIBAIJ,MatSolveTransAdd_MPIBAIJ,MatLUFactor_MPIBAIJ, 78279bdfe76SSatish Balay 0,0,0,0, 783acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 78458667388SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 785*93bc47c4SSatish Balay MatZeroEntries_MPIBAIJ,0,MatGetReordering_MPIBAIJ,MatLUFactorSymbolic_MPIBAIJ, 786*93bc47c4SSatish Balay MatLUFactorNumeric_MPIBAIJ,0,0,MatGetSize_MPIBAIJ, 787*93bc47c4SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,MatILUFactorSymbolic_MPIBAIJ,0, 78879bdfe76SSatish Balay 0,0,0,0, 78979bdfe76SSatish Balay 0,0,0,0, 790d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 791d212a18eSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,0, 7925a838052SSatish Balay MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ}; 79379bdfe76SSatish Balay 79479bdfe76SSatish Balay 79579bdfe76SSatish Balay /*@C 79679bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 79779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 79879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 79979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 80079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 80179bdfe76SSatish Balay 80279bdfe76SSatish Balay Input Parameters: 80379bdfe76SSatish Balay . comm - MPI communicator 80479bdfe76SSatish Balay . bs - size of blockk 80579bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 80692e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 80792e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 80892e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 80992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 81092e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 81179bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 81292e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 81379bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 81479bdfe76SSatish Balay submatrix (same for all local rows) 81592e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 81692e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 81792e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 81892e8d321SLois Curfman McInnes it is zero. 81992e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 82079bdfe76SSatish Balay submatrix (same for all local rows). 82192e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 82292e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 82392e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 82479bdfe76SSatish Balay 82579bdfe76SSatish Balay Output Parameter: 82679bdfe76SSatish Balay . A - the matrix 82779bdfe76SSatish Balay 82879bdfe76SSatish Balay Notes: 82979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 83079bdfe76SSatish Balay (possibly both). 83179bdfe76SSatish Balay 83279bdfe76SSatish Balay Storage Information: 83379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 83479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 83579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 83679bdfe76SSatish Balay local matrix (a rectangular submatrix). 83779bdfe76SSatish Balay 83879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 83979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 84079bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 84179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 84279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 84379bdfe76SSatish Balay 84479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 84579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 84679bdfe76SSatish Balay 84779bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 84879bdfe76SSatish Balay $ ------------------- 84979bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 85079bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 85179bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 85279bdfe76SSatish Balay $ ------------------- 85379bdfe76SSatish Balay $ 85479bdfe76SSatish Balay 85579bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 85679bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 85779bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 85857b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 85979bdfe76SSatish Balay 86079bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 86179bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 86279bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 86392e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 86492e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 86592e8d321SLois Curfman McInnes matrices and the file $(PETSC_DIR)/Performance. 86679bdfe76SSatish Balay 86792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 86879bdfe76SSatish Balay 86979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 87079bdfe76SSatish Balay @*/ 87179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 87279bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 87379bdfe76SSatish Balay { 87479bdfe76SSatish Balay Mat B; 87579bdfe76SSatish Balay Mat_MPIBAIJ *b; 876cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 87779bdfe76SSatish Balay 87879bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 87979bdfe76SSatish Balay *A = 0; 88079bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 88179bdfe76SSatish Balay PLogObjectCreate(B); 88279bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 88379bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 88479bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 88579bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 88679bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 88757b952d6SSatish Balay 88879bdfe76SSatish Balay B->factor = 0; 88979bdfe76SSatish Balay B->assembled = PETSC_FALSE; 89079bdfe76SSatish Balay 89179bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 89279bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 89379bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 89479bdfe76SSatish Balay 89579bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 89657b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 897cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 898cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 899cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 900cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 90179bdfe76SSatish Balay 90279bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 90379bdfe76SSatish Balay work[0] = m; work[1] = n; 90479bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 90579bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 90679bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 90779bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 90879bdfe76SSatish Balay } 90979bdfe76SSatish Balay if (m == PETSC_DECIDE) { 91079bdfe76SSatish Balay Mbs = M/bs; 91179bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 91279bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 91379bdfe76SSatish Balay m = mbs*bs; 91479bdfe76SSatish Balay } 91579bdfe76SSatish Balay if (n == PETSC_DECIDE) { 91679bdfe76SSatish Balay Nbs = N/bs; 91779bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 91879bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 91979bdfe76SSatish Balay n = nbs*bs; 92079bdfe76SSatish Balay } 921cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 92279bdfe76SSatish Balay 92379bdfe76SSatish Balay b->m = m; B->m = m; 92479bdfe76SSatish Balay b->n = n; B->n = n; 92579bdfe76SSatish Balay b->N = N; B->N = N; 92679bdfe76SSatish Balay b->M = M; B->M = M; 92779bdfe76SSatish Balay b->bs = bs; 92879bdfe76SSatish Balay b->bs2 = bs*bs; 92979bdfe76SSatish Balay b->mbs = mbs; 93079bdfe76SSatish Balay b->nbs = nbs; 93179bdfe76SSatish Balay b->Mbs = Mbs; 93279bdfe76SSatish Balay b->Nbs = Nbs; 93379bdfe76SSatish Balay 93479bdfe76SSatish Balay /* build local table of row and column ownerships */ 93579bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 93679bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 93779bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 93879bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 93979bdfe76SSatish Balay b->rowners[0] = 0; 94079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 94179bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 94279bdfe76SSatish Balay } 94379bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 94479bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 94579bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 94679bdfe76SSatish Balay b->cowners[0] = 0; 94779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 94879bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 94979bdfe76SSatish Balay } 95079bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 95179bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 95279bdfe76SSatish Balay 95379bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 95479bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 95579bdfe76SSatish Balay PLogObjectParent(B,b->A); 95679bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 95779bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 95879bdfe76SSatish Balay PLogObjectParent(B,b->B); 95979bdfe76SSatish Balay 96079bdfe76SSatish Balay /* build cache for off array entries formed */ 96179bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 96279bdfe76SSatish Balay b->colmap = 0; 96379bdfe76SSatish Balay b->garray = 0; 96479bdfe76SSatish Balay b->roworiented = 1; 96579bdfe76SSatish Balay 96679bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 96779bdfe76SSatish Balay b->lvec = 0; 96879bdfe76SSatish Balay b->Mvctx = 0; 96979bdfe76SSatish Balay 97079bdfe76SSatish Balay /* stuff for MatGetRow() */ 97179bdfe76SSatish Balay b->rowindices = 0; 97279bdfe76SSatish Balay b->rowvalues = 0; 97379bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 97479bdfe76SSatish Balay 97579bdfe76SSatish Balay *A = B; 97679bdfe76SSatish Balay return 0; 97779bdfe76SSatish Balay } 97857b952d6SSatish Balay 97957b952d6SSatish Balay #include "sys.h" 98057b952d6SSatish Balay 98157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 98257b952d6SSatish Balay { 98357b952d6SSatish Balay Mat A; 98457b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 98557b952d6SSatish Balay Scalar *vals,*buf; 98657b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 98757b952d6SSatish Balay MPI_Status status; 988cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 98957b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 99057b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 99157b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 99257b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 99357b952d6SSatish Balay 99457b952d6SSatish Balay 99557b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 99657b952d6SSatish Balay bs2 = bs*bs; 99757b952d6SSatish Balay 99857b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 99957b952d6SSatish Balay if (!rank) { 100057b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 100157b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 1002cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 100357b952d6SSatish Balay } 100457b952d6SSatish Balay 100557b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 100657b952d6SSatish Balay M = header[1]; N = header[2]; 100757b952d6SSatish Balay 100857b952d6SSatish Balay 100957b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 101057b952d6SSatish Balay 101157b952d6SSatish Balay /* 101257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 101357b952d6SSatish Balay divisible by the blocksize 101457b952d6SSatish Balay */ 101557b952d6SSatish Balay Mbs = M/bs; 101657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 101757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 101857b952d6SSatish Balay else Mbs++; 101957b952d6SSatish Balay if (extra_rows &&!rank) { 102057b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 102157b952d6SSatish Balay } 102257b952d6SSatish Balay /* determine ownership of all rows */ 102357b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 102457b952d6SSatish Balay m = mbs * bs; 1025cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 1026cee3aa6bSSatish Balay browners = rowners + size + 1; 102757b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 102857b952d6SSatish Balay rowners[0] = 0; 1029cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 1030cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 103157b952d6SSatish Balay rstart = rowners[rank]; 103257b952d6SSatish Balay rend = rowners[rank+1]; 103357b952d6SSatish Balay 103457b952d6SSatish Balay /* distribute row lengths to all processors */ 103557b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 103657b952d6SSatish Balay if (!rank) { 103757b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 103857b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 103957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 104057b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1041cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 1042cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 104357b952d6SSatish Balay PetscFree(sndcounts); 104457b952d6SSatish Balay } 104557b952d6SSatish Balay else { 104657b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 104757b952d6SSatish Balay } 104857b952d6SSatish Balay 104957b952d6SSatish Balay if (!rank) { 105057b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 105157b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 105257b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 105357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 105457b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 105557b952d6SSatish Balay procsnz[i] += rowlengths[j]; 105657b952d6SSatish Balay } 105757b952d6SSatish Balay } 105857b952d6SSatish Balay PetscFree(rowlengths); 105957b952d6SSatish Balay 106057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 106157b952d6SSatish Balay maxnz = 0; 106257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 106357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 106457b952d6SSatish Balay } 106557b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 106657b952d6SSatish Balay 106757b952d6SSatish Balay /* read in my part of the matrix column indices */ 106857b952d6SSatish Balay nz = procsnz[0]; 106957b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 107057b952d6SSatish Balay mycols = ibuf; 1071cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 107257b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1073cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1074cee3aa6bSSatish Balay 107557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 107657b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 107757b952d6SSatish Balay nz = procsnz[i]; 107857b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 107957b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 108057b952d6SSatish Balay } 108157b952d6SSatish Balay /* read in the stuff for the last proc */ 108257b952d6SSatish Balay if ( size != 1 ) { 108357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 108457b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 108557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1086cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 108757b952d6SSatish Balay } 108857b952d6SSatish Balay PetscFree(cols); 108957b952d6SSatish Balay } 109057b952d6SSatish Balay else { 109157b952d6SSatish Balay /* determine buffer space needed for message */ 109257b952d6SSatish Balay nz = 0; 109357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 109457b952d6SSatish Balay nz += locrowlens[i]; 109557b952d6SSatish Balay } 109657b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 109757b952d6SSatish Balay mycols = ibuf; 109857b952d6SSatish Balay /* receive message of column indices*/ 109957b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 110057b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1101cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 110257b952d6SSatish Balay } 110357b952d6SSatish Balay 110457b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1105cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1106cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 110757b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1108cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 110957b952d6SSatish Balay masked1 = mask + Mbs; 111057b952d6SSatish Balay masked2 = masked1 + Mbs; 111157b952d6SSatish Balay rowcount = 0; nzcount = 0; 111257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 111357b952d6SSatish Balay dcount = 0; 111457b952d6SSatish Balay odcount = 0; 111557b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 111657b952d6SSatish Balay kmax = locrowlens[rowcount]; 111757b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 111857b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 111957b952d6SSatish Balay if (!mask[tmp]) { 112057b952d6SSatish Balay mask[tmp] = 1; 112157b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 112257b952d6SSatish Balay else masked1[dcount++] = tmp; 112357b952d6SSatish Balay } 112457b952d6SSatish Balay } 112557b952d6SSatish Balay rowcount++; 112657b952d6SSatish Balay } 1127cee3aa6bSSatish Balay 112857b952d6SSatish Balay dlens[i] = dcount; 112957b952d6SSatish Balay odlens[i] = odcount; 1130cee3aa6bSSatish Balay 113157b952d6SSatish Balay /* zero out the mask elements we set */ 113257b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 113357b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 113457b952d6SSatish Balay } 1135cee3aa6bSSatish Balay 113657b952d6SSatish Balay /* create our matrix */ 113757b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 113857b952d6SSatish Balay A = *newmat; 11396d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 114057b952d6SSatish Balay 114157b952d6SSatish Balay if (!rank) { 114257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 114357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 114457b952d6SSatish Balay nz = procsnz[0]; 114557b952d6SSatish Balay vals = buf; 1146cee3aa6bSSatish Balay mycols = ibuf; 1147cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 114857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1149cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 115057b952d6SSatish Balay /* insert into matrix */ 115157b952d6SSatish Balay jj = rstart*bs; 115257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 115357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 115457b952d6SSatish Balay mycols += locrowlens[i]; 115557b952d6SSatish Balay vals += locrowlens[i]; 115657b952d6SSatish Balay jj++; 115757b952d6SSatish Balay } 115857b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 115957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 116057b952d6SSatish Balay nz = procsnz[i]; 116157b952d6SSatish Balay vals = buf; 116257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 116357b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 116457b952d6SSatish Balay } 116557b952d6SSatish Balay /* the last proc */ 116657b952d6SSatish Balay if ( size != 1 ){ 116757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1168cee3aa6bSSatish Balay vals = buf; 116957b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 117057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1171cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 117257b952d6SSatish Balay } 117357b952d6SSatish Balay PetscFree(procsnz); 117457b952d6SSatish Balay } 117557b952d6SSatish Balay else { 117657b952d6SSatish Balay /* receive numeric values */ 117757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 117857b952d6SSatish Balay 117957b952d6SSatish Balay /* receive message of values*/ 118057b952d6SSatish Balay vals = buf; 1181cee3aa6bSSatish Balay mycols = ibuf; 118257b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 118357b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1184cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 118557b952d6SSatish Balay 118657b952d6SSatish Balay /* insert into matrix */ 118757b952d6SSatish Balay jj = rstart*bs; 1188cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 118957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 119057b952d6SSatish Balay mycols += locrowlens[i]; 119157b952d6SSatish Balay vals += locrowlens[i]; 119257b952d6SSatish Balay jj++; 119357b952d6SSatish Balay } 119457b952d6SSatish Balay } 119557b952d6SSatish Balay PetscFree(locrowlens); 119657b952d6SSatish Balay PetscFree(buf); 119757b952d6SSatish Balay PetscFree(ibuf); 119857b952d6SSatish Balay PetscFree(rowners); 119957b952d6SSatish Balay PetscFree(dlens); 1200cee3aa6bSSatish Balay PetscFree(mask); 12016d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 12026d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 120357b952d6SSatish Balay return 0; 120457b952d6SSatish Balay } 120557b952d6SSatish Balay 120657b952d6SSatish Balay 1207