179bdfe76SSatish Balay #ifndef lint 2*5a838052SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.12 1996/07/08 22:20:04 bsmith Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 1557b952d6SSatish Balay 1657b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1857b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1957b952d6SSatish Balay length of colmap equals the global matrix length. 2057b952d6SSatish Balay */ 2157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2257b952d6SSatish Balay { 2357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2457b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 2557b952d6SSatish Balay int nbs = B->nbs,i; 2657b952d6SSatish Balay 2757b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 2857b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 2957b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 3057b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 3157b952d6SSatish Balay return 0; 3257b952d6SSatish Balay } 3357b952d6SSatish Balay 3457b952d6SSatish Balay 35acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 3657b952d6SSatish Balay { 3757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3857b952d6SSatish Balay int ierr; 3957b952d6SSatish Balay if (baij->size == 1) { 4057b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 4157b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 4257b952d6SSatish Balay return 0; 4357b952d6SSatish Balay } 4457b952d6SSatish Balay 4557b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4657b952d6SSatish Balay { 4757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4857b952d6SSatish Balay Scalar value; 4957b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 5057b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 5157b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 5257b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 5357b952d6SSatish Balay 5457b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 5557b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 5657b952d6SSatish Balay } 5757b952d6SSatish Balay baij->insertmode = addv; 5857b952d6SSatish Balay rstart_orig = rstart*bs; 5957b952d6SSatish Balay rend_orig = rend*bs; 6057b952d6SSatish Balay cstart_orig = cstart*bs; 6157b952d6SSatish Balay cend_orig = cend*bs; 6257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 6357b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 6457b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 6557b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 6657b952d6SSatish Balay row = im[i] - rstart_orig; 6757b952d6SSatish Balay for ( j=0; j<n; j++ ) { 6857b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 6957b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 7057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 7157b952d6SSatish Balay col = in[j] - cstart_orig; 7257b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 7357b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 7457b952d6SSatish Balay } 7557b952d6SSatish Balay else { 7657b952d6SSatish Balay if (mat->was_assembled) { 7757b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 7857b952d6SSatish Balay col = baij->colmap[in[j]]; 7957b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 8057b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 8157b952d6SSatish Balay col = in[j]; 8257b952d6SSatish Balay } 8357b952d6SSatish Balay } 8457b952d6SSatish Balay else col = in[j]; 8557b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8657b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8757b952d6SSatish Balay } 8857b952d6SSatish Balay } 8957b952d6SSatish Balay } 9057b952d6SSatish Balay else { 9157b952d6SSatish Balay if (roworiented) { 9257b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 9357b952d6SSatish Balay } 9457b952d6SSatish Balay else { 9557b952d6SSatish Balay row = im[i]; 9657b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9757b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 9857b952d6SSatish Balay } 9957b952d6SSatish Balay } 10057b952d6SSatish Balay } 10157b952d6SSatish Balay } 10257b952d6SSatish Balay return 0; 10357b952d6SSatish Balay } 10457b952d6SSatish Balay 105d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 106d6de1c52SSatish Balay { 107d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 108d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 109d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 110d6de1c52SSatish Balay 111d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 112d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 113d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 114d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 115d6de1c52SSatish Balay row = idxm[i] - bsrstart; 116d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 117d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 118d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 119d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 120d6de1c52SSatish Balay col = idxn[j] - bscstart; 121d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 122d6de1c52SSatish Balay } 123d6de1c52SSatish Balay else { 124acdf5bf4SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 125d9d09a02SSatish Balay if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0; 126d9d09a02SSatish Balay else { 127d9d09a02SSatish Balay col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs; 128d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 129d6de1c52SSatish Balay } 130d6de1c52SSatish Balay } 131d6de1c52SSatish Balay } 132d9d09a02SSatish Balay } 133d6de1c52SSatish Balay else { 134d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 135d6de1c52SSatish Balay } 136d6de1c52SSatish Balay } 137d6de1c52SSatish Balay return 0; 138d6de1c52SSatish Balay } 139d6de1c52SSatish Balay 140d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 141d6de1c52SSatish Balay { 142d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 143d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 144acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 145d6de1c52SSatish Balay double sum = 0.0; 146d6de1c52SSatish Balay Scalar *v; 147d6de1c52SSatish Balay 148d6de1c52SSatish Balay if (baij->size == 1) { 149d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 150d6de1c52SSatish Balay } else { 151d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 152d6de1c52SSatish Balay v = amat->a; 153d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 154d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 155d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 156d6de1c52SSatish Balay #else 157d6de1c52SSatish Balay sum += (*v)*(*v); v++; 158d6de1c52SSatish Balay #endif 159d6de1c52SSatish Balay } 160d6de1c52SSatish Balay v = bmat->a; 161d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 162d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 163d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 164d6de1c52SSatish Balay #else 165d6de1c52SSatish Balay sum += (*v)*(*v); v++; 166d6de1c52SSatish Balay #endif 167d6de1c52SSatish Balay } 168d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 169d6de1c52SSatish Balay *norm = sqrt(*norm); 170d6de1c52SSatish Balay } 171acdf5bf4SSatish Balay else 172acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 173d6de1c52SSatish Balay } 174d6de1c52SSatish Balay return 0; 175d6de1c52SSatish Balay } 17657b952d6SSatish Balay 17757b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 17857b952d6SSatish Balay { 17957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 18057b952d6SSatish Balay MPI_Comm comm = mat->comm; 18157b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 18257b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 18357b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 18457b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 18557b952d6SSatish Balay InsertMode addv; 18657b952d6SSatish Balay Scalar *rvalues,*svalues; 18757b952d6SSatish Balay 18857b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 18957b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 19057b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 19157b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 19257b952d6SSatish Balay } 19357b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 19457b952d6SSatish Balay 19557b952d6SSatish Balay /* first count number of contributors to each processor */ 19657b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 19757b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 19857b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 19957b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 20057b952d6SSatish Balay idx = baij->stash.idx[i]; 20157b952d6SSatish Balay for ( j=0; j<size; j++ ) { 20257b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 20357b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 20457b952d6SSatish Balay } 20557b952d6SSatish Balay } 20657b952d6SSatish Balay } 20757b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 20857b952d6SSatish Balay 20957b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 21057b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 21157b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 21257b952d6SSatish Balay nreceives = work[rank]; 21357b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 21457b952d6SSatish Balay nmax = work[rank]; 21557b952d6SSatish Balay PetscFree(work); 21657b952d6SSatish Balay 21757b952d6SSatish Balay /* post receives: 21857b952d6SSatish Balay 1) each message will consist of ordered pairs 21957b952d6SSatish Balay (global index,value) we store the global index as a double 22057b952d6SSatish Balay to simplify the message passing. 22157b952d6SSatish Balay 2) since we don't know how long each individual message is we 22257b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 22357b952d6SSatish Balay this is a lot of wasted space. 22457b952d6SSatish Balay 22557b952d6SSatish Balay 22657b952d6SSatish Balay This could be done better. 22757b952d6SSatish Balay */ 22857b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 22957b952d6SSatish Balay CHKPTRQ(rvalues); 23057b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 23157b952d6SSatish Balay CHKPTRQ(recv_waits); 23257b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 23357b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 23457b952d6SSatish Balay comm,recv_waits+i); 23557b952d6SSatish Balay } 23657b952d6SSatish Balay 23757b952d6SSatish Balay /* do sends: 23857b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 23957b952d6SSatish Balay the ith processor 24057b952d6SSatish Balay */ 24157b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 24257b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 24357b952d6SSatish Balay CHKPTRQ(send_waits); 24457b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 24557b952d6SSatish Balay starts[0] = 0; 24657b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 24757b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 24857b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 24957b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 25057b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 25157b952d6SSatish Balay } 25257b952d6SSatish Balay PetscFree(owner); 25357b952d6SSatish Balay starts[0] = 0; 25457b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 25557b952d6SSatish Balay count = 0; 25657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 25757b952d6SSatish Balay if (procs[i]) { 25857b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 25957b952d6SSatish Balay comm,send_waits+count++); 26057b952d6SSatish Balay } 26157b952d6SSatish Balay } 26257b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 26357b952d6SSatish Balay 26457b952d6SSatish Balay /* Free cache space */ 26557b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 26657b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 26757b952d6SSatish Balay 26857b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 26957b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 27057b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 27157b952d6SSatish Balay baij->rmax = nmax; 27257b952d6SSatish Balay 27357b952d6SSatish Balay return 0; 27457b952d6SSatish Balay } 27557b952d6SSatish Balay 27657b952d6SSatish Balay 27757b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 27857b952d6SSatish Balay { 27957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 28057b952d6SSatish Balay MPI_Status *send_status,recv_status; 28157b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 28257b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 28357b952d6SSatish Balay Scalar *values,val; 28457b952d6SSatish Balay InsertMode addv = baij->insertmode; 28557b952d6SSatish Balay 28657b952d6SSatish Balay /* wait on receives */ 28757b952d6SSatish Balay while (count) { 28857b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 28957b952d6SSatish Balay /* unpack receives into our local space */ 29057b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 29157b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 29257b952d6SSatish Balay n = n/3; 29357b952d6SSatish Balay for ( i=0; i<n; i++ ) { 29457b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 29557b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 29657b952d6SSatish Balay val = values[3*i+2]; 29757b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 29857b952d6SSatish Balay col -= baij->cstart*bs; 29957b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 30057b952d6SSatish Balay } 30157b952d6SSatish Balay else { 30257b952d6SSatish Balay if (mat->was_assembled) { 30357b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 30457b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 30557b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 30657b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 30757b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 30857b952d6SSatish Balay } 30957b952d6SSatish Balay } 31057b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 31157b952d6SSatish Balay } 31257b952d6SSatish Balay } 31357b952d6SSatish Balay count--; 31457b952d6SSatish Balay } 31557b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 31657b952d6SSatish Balay 31757b952d6SSatish Balay /* wait on sends */ 31857b952d6SSatish Balay if (baij->nsends) { 31957b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 32057b952d6SSatish Balay CHKPTRQ(send_status); 32157b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 32257b952d6SSatish Balay PetscFree(send_status); 32357b952d6SSatish Balay } 32457b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 32557b952d6SSatish Balay 32657b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 32757b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 32857b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 32957b952d6SSatish Balay 33057b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 33157b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 33257b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 33357b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 33457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 33557b952d6SSatish Balay } 33657b952d6SSatish Balay 3376d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 33857b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 33957b952d6SSatish Balay } 34057b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 34157b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 34257b952d6SSatish Balay 34357b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 34457b952d6SSatish Balay return 0; 34557b952d6SSatish Balay } 34657b952d6SSatish Balay 34757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 34857b952d6SSatish Balay { 34957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 35057b952d6SSatish Balay int ierr; 35157b952d6SSatish Balay 35257b952d6SSatish Balay if (baij->size == 1) { 35357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 35457b952d6SSatish Balay } 35557b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 35657b952d6SSatish Balay return 0; 35757b952d6SSatish Balay } 35857b952d6SSatish Balay 35957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 36057b952d6SSatish Balay { 36157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 362cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 36357b952d6SSatish Balay FILE *fd; 36457b952d6SSatish Balay ViewerType vtype; 36557b952d6SSatish Balay 36657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36757b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 36857b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 36957b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 37057b952d6SSatish Balay int nz, nzalloc, mem; 37157b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 37257b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 37357b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 37457b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 37557b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 37657b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 37757b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 37857b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 37957b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 38057b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 38157b952d6SSatish Balay fflush(fd); 38257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 38357b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 38457b952d6SSatish Balay return 0; 38557b952d6SSatish Balay } 38657b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 38757b952d6SSatish Balay return 0; 38857b952d6SSatish Balay } 38957b952d6SSatish Balay } 39057b952d6SSatish Balay 39157b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 39257b952d6SSatish Balay Draw draw; 39357b952d6SSatish Balay PetscTruth isnull; 39457b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 39557b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 39657b952d6SSatish Balay } 39757b952d6SSatish Balay 39857b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 39957b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 40057b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 40157b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 40257b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 40357b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 40457b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 40557b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 40657b952d6SSatish Balay fflush(fd); 40757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 40857b952d6SSatish Balay } 40957b952d6SSatish Balay else { 41057b952d6SSatish Balay int size = baij->size; 41157b952d6SSatish Balay rank = baij->rank; 41257b952d6SSatish Balay if (size == 1) { 41357b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 41457b952d6SSatish Balay } 41557b952d6SSatish Balay else { 41657b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 41757b952d6SSatish Balay Mat A; 41857b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 41957b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 42057b952d6SSatish Balay int mbs=baij->mbs; 42157b952d6SSatish Balay Scalar *a; 42257b952d6SSatish Balay 42357b952d6SSatish Balay if (!rank) { 424cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42557b952d6SSatish Balay CHKERRQ(ierr); 42657b952d6SSatish Balay } 42757b952d6SSatish Balay else { 428cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42957b952d6SSatish Balay CHKERRQ(ierr); 43057b952d6SSatish Balay } 43157b952d6SSatish Balay PLogObjectParent(mat,A); 43257b952d6SSatish Balay 43357b952d6SSatish Balay /* copy over the A part */ 43457b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 43557b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 43657b952d6SSatish Balay row = baij->rstart; 43757b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 43857b952d6SSatish Balay 43957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 44057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 44157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 44257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 44357b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 44457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 445cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 446cee3aa6bSSatish Balay col++; a += bs; 44757b952d6SSatish Balay } 44857b952d6SSatish Balay } 44957b952d6SSatish Balay } 45057b952d6SSatish Balay /* copy over the B part */ 45157b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 45257b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 45357b952d6SSatish Balay row = baij->rstart*bs; 45457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 45557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 45657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 45757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 45857b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 45957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 460cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 461cee3aa6bSSatish Balay col++; a += bs; 46257b952d6SSatish Balay } 46357b952d6SSatish Balay } 46457b952d6SSatish Balay } 46557b952d6SSatish Balay PetscFree(rvals); 4666d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 4676d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 46857b952d6SSatish Balay if (!rank) { 46957b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 47057b952d6SSatish Balay } 47157b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 47257b952d6SSatish Balay } 47357b952d6SSatish Balay } 47457b952d6SSatish Balay return 0; 47557b952d6SSatish Balay } 47657b952d6SSatish Balay 47757b952d6SSatish Balay 47857b952d6SSatish Balay 47957b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 48057b952d6SSatish Balay { 48157b952d6SSatish Balay Mat mat = (Mat) obj; 48257b952d6SSatish Balay int ierr; 48357b952d6SSatish Balay ViewerType vtype; 48457b952d6SSatish Balay 48557b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 48657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 48757b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 48857b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 48957b952d6SSatish Balay } 49057b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 49157b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 49257b952d6SSatish Balay } 49357b952d6SSatish Balay return 0; 49457b952d6SSatish Balay } 49557b952d6SSatish Balay 49679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 49779bdfe76SSatish Balay { 49879bdfe76SSatish Balay Mat mat = (Mat) obj; 49979bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 50079bdfe76SSatish Balay int ierr; 50179bdfe76SSatish Balay 50279bdfe76SSatish Balay #if defined(PETSC_LOG) 50379bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 50479bdfe76SSatish Balay #endif 50579bdfe76SSatish Balay 50679bdfe76SSatish Balay PetscFree(baij->rowners); 50779bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 50879bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 50979bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 51079bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 51179bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 51279bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 51379bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 51479bdfe76SSatish Balay PetscFree(baij); 51579bdfe76SSatish Balay PLogObjectDestroy(mat); 51679bdfe76SSatish Balay PetscHeaderDestroy(mat); 51779bdfe76SSatish Balay return 0; 51879bdfe76SSatish Balay } 51979bdfe76SSatish Balay 520cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 521cee3aa6bSSatish Balay { 522cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 52347b4a8eaSLois Curfman McInnes int ierr, nt; 524cee3aa6bSSatish Balay 52547b4a8eaSLois Curfman McInnes ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 52647b4a8eaSLois Curfman McInnes if (nt != a->n) { 52747b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx"); 52847b4a8eaSLois Curfman McInnes } 52947b4a8eaSLois Curfman McInnes ierr = VecGetLocalSize(yy,&nt); CHKERRQ(ierr); 53047b4a8eaSLois Curfman McInnes if (nt != a->m) { 53147b4a8eaSLois Curfman McInnes SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy"); 53247b4a8eaSLois Curfman McInnes } 533cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 534cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 535cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 536cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 537cee3aa6bSSatish Balay return 0; 538cee3aa6bSSatish Balay } 539cee3aa6bSSatish Balay 540cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 541cee3aa6bSSatish Balay { 542cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 543cee3aa6bSSatish Balay int ierr; 544cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 545cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 546cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 547cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 548cee3aa6bSSatish Balay return 0; 549cee3aa6bSSatish Balay } 550cee3aa6bSSatish Balay 551cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 552cee3aa6bSSatish Balay { 553cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 554cee3aa6bSSatish Balay int ierr; 555cee3aa6bSSatish Balay 556cee3aa6bSSatish Balay /* do nondiagonal part */ 557cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 558cee3aa6bSSatish Balay /* send it on its way */ 559cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 560cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 561cee3aa6bSSatish Balay /* do local part */ 562cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 563cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 564cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 565cee3aa6bSSatish Balay /* but is not perhaps always true. */ 566cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 567cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 568cee3aa6bSSatish Balay return 0; 569cee3aa6bSSatish Balay } 570cee3aa6bSSatish Balay 571cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 572cee3aa6bSSatish Balay { 573cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 574cee3aa6bSSatish Balay int ierr; 575cee3aa6bSSatish Balay 576cee3aa6bSSatish Balay /* do nondiagonal part */ 577cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 578cee3aa6bSSatish Balay /* send it on its way */ 579cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 580cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 581cee3aa6bSSatish Balay /* do local part */ 582cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 583cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 584cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 585cee3aa6bSSatish Balay /* but is not perhaps always true. */ 586cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 587cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 588cee3aa6bSSatish Balay return 0; 589cee3aa6bSSatish Balay } 590cee3aa6bSSatish Balay 591cee3aa6bSSatish Balay /* 592cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 593cee3aa6bSSatish Balay diagonal block 594cee3aa6bSSatish Balay */ 595cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 596cee3aa6bSSatish Balay { 597cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 598cee3aa6bSSatish Balay if (a->M != a->N) 599cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 600cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 601cee3aa6bSSatish Balay } 602cee3aa6bSSatish Balay 603cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 604cee3aa6bSSatish Balay { 605cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 606cee3aa6bSSatish Balay int ierr; 607cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 608cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 609cee3aa6bSSatish Balay return 0; 610cee3aa6bSSatish Balay } 61157b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 61257b952d6SSatish Balay { 61357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 61457b952d6SSatish Balay *m = mat->M; *n = mat->N; 61557b952d6SSatish Balay return 0; 61657b952d6SSatish Balay } 61757b952d6SSatish Balay 61857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 61957b952d6SSatish Balay { 62057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62157b952d6SSatish Balay *m = mat->m; *n = mat->N; 62257b952d6SSatish Balay return 0; 62357b952d6SSatish Balay } 62457b952d6SSatish Balay 62557b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 62657b952d6SSatish Balay { 62757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62857b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 62957b952d6SSatish Balay return 0; 63057b952d6SSatish Balay } 63157b952d6SSatish Balay 632acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 633acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 634acdf5bf4SSatish Balay 635acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 636acdf5bf4SSatish Balay { 637acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 638acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 639acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 640d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 641d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 642acdf5bf4SSatish Balay 643acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 644acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 645acdf5bf4SSatish Balay 646acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 647acdf5bf4SSatish Balay /* 648acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 649acdf5bf4SSatish Balay */ 650acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 651bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 652bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 653acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 654acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 655acdf5bf4SSatish Balay } 656acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 657acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 658acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 659acdf5bf4SSatish Balay } 660acdf5bf4SSatish Balay 661acdf5bf4SSatish Balay 662d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 663d9d09a02SSatish Balay lrow = row - brstart; 664acdf5bf4SSatish Balay 665acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 666acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 667acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 668acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 669acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 670acdf5bf4SSatish Balay nztot = nzA + nzB; 671acdf5bf4SSatish Balay 672acdf5bf4SSatish Balay cmap = mat->garray; 673acdf5bf4SSatish Balay if (v || idx) { 674acdf5bf4SSatish Balay if (nztot) { 675acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 676acdf5bf4SSatish Balay int imark = -1; 677acdf5bf4SSatish Balay if (v) { 678acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 679acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 680d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 681acdf5bf4SSatish Balay else break; 682acdf5bf4SSatish Balay } 683acdf5bf4SSatish Balay imark = i; 684acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 685acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 686acdf5bf4SSatish Balay } 687acdf5bf4SSatish Balay if (idx) { 688acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 689acdf5bf4SSatish Balay if (imark > -1) { 690acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 691bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 692acdf5bf4SSatish Balay } 693acdf5bf4SSatish Balay } else { 694acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 695d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 696d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 697acdf5bf4SSatish Balay else break; 698acdf5bf4SSatish Balay } 699acdf5bf4SSatish Balay imark = i; 700acdf5bf4SSatish Balay } 701d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 702d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 703acdf5bf4SSatish Balay } 704acdf5bf4SSatish Balay } 705d212a18eSSatish Balay else { 706d212a18eSSatish Balay if (idx) *idx = 0; 707d212a18eSSatish Balay if (v) *v = 0; 708d212a18eSSatish Balay } 709acdf5bf4SSatish Balay } 710acdf5bf4SSatish Balay *nz = nztot; 711acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 712acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 713acdf5bf4SSatish Balay return 0; 714acdf5bf4SSatish Balay } 715acdf5bf4SSatish Balay 716acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 717acdf5bf4SSatish Balay { 718acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 719acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 720acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 721acdf5bf4SSatish Balay } 722acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 723acdf5bf4SSatish Balay return 0; 724acdf5bf4SSatish Balay } 725acdf5bf4SSatish Balay 726*5a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat, int *bs) 727*5a838052SSatish Balay { 728*5a838052SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 729*5a838052SSatish Balay *bs = baij->bs; 730*5a838052SSatish Balay return 0; 731*5a838052SSatish Balay } 732*5a838052SSatish Balay 73379bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 73479bdfe76SSatish Balay static struct _MatOps MatOps = { 735bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 736cee3aa6bSSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 73779bdfe76SSatish Balay 0,0,0,0, 73879bdfe76SSatish Balay 0,0,0,0, 739acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 74057b952d6SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 74157b952d6SSatish Balay 0,0,MatGetReordering_MPIBAIJ,0, 74257b952d6SSatish Balay 0,0,0,MatGetSize_MPIBAIJ, 74357b952d6SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 74479bdfe76SSatish Balay 0,0,0,0, 74579bdfe76SSatish Balay 0,0,0,0, 746d212a18eSSatish Balay 0,0,0,MatGetSubMatrices_MPIBAIJ, 747d212a18eSSatish Balay MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,0, 748*5a838052SSatish Balay MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ}; 74979bdfe76SSatish Balay 75079bdfe76SSatish Balay 75179bdfe76SSatish Balay /*@C 75279bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 75379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 75479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 75579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 75679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 75779bdfe76SSatish Balay 75879bdfe76SSatish Balay Input Parameters: 75979bdfe76SSatish Balay . comm - MPI communicator 76079bdfe76SSatish Balay . bs - size of blockk 76179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 76292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 76392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 76492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 76592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 76692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 76779bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 76892e8d321SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 76979bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 77079bdfe76SSatish Balay submatrix (same for all local rows) 77192e8d321SLois Curfman McInnes . d_nzz - array containing the number of block nonzeros in the various block rows 77292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 77392e8d321SLois Curfman McInnes row) or PETSC_NULL. You must leave room for the diagonal entry even if 77492e8d321SLois Curfman McInnes it is zero. 77592e8d321SLois Curfman McInnes . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 77679bdfe76SSatish Balay submatrix (same for all local rows). 77792e8d321SLois Curfman McInnes . o_nzz - array containing the number of nonzeros in the various block rows of the 77892e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 77992e8d321SLois Curfman McInnes each block row) or PETSC_NULL. 78079bdfe76SSatish Balay 78179bdfe76SSatish Balay Output Parameter: 78279bdfe76SSatish Balay . A - the matrix 78379bdfe76SSatish Balay 78479bdfe76SSatish Balay Notes: 78579bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 78679bdfe76SSatish Balay (possibly both). 78779bdfe76SSatish Balay 78879bdfe76SSatish Balay Storage Information: 78979bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 79079bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 79179bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 79279bdfe76SSatish Balay local matrix (a rectangular submatrix). 79379bdfe76SSatish Balay 79479bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 79579bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 79679bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 79779bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 79879bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 79979bdfe76SSatish Balay 80079bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 80179bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 80279bdfe76SSatish Balay 80379bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 80479bdfe76SSatish Balay $ ------------------- 80579bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 80679bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 80779bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 80879bdfe76SSatish Balay $ ------------------- 80979bdfe76SSatish Balay $ 81079bdfe76SSatish Balay 81179bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 81279bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 81379bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 81457b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 81579bdfe76SSatish Balay 81679bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 81779bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 81879bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 81992e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 82092e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 82192e8d321SLois Curfman McInnes matrices and the file $(PETSC_DIR)/Performance. 82279bdfe76SSatish Balay 82392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 82479bdfe76SSatish Balay 82579bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 82679bdfe76SSatish Balay @*/ 82779bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 82879bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 82979bdfe76SSatish Balay { 83079bdfe76SSatish Balay Mat B; 83179bdfe76SSatish Balay Mat_MPIBAIJ *b; 832cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 83379bdfe76SSatish Balay 83479bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 83579bdfe76SSatish Balay *A = 0; 83679bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 83779bdfe76SSatish Balay PLogObjectCreate(B); 83879bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 83979bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 84079bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 84179bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 84279bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 84357b952d6SSatish Balay 84479bdfe76SSatish Balay B->factor = 0; 84579bdfe76SSatish Balay B->assembled = PETSC_FALSE; 84679bdfe76SSatish Balay 84779bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 84879bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 84979bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 85079bdfe76SSatish Balay 85179bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 85257b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 853cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 854cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 855cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 856cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 85779bdfe76SSatish Balay 85879bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 85979bdfe76SSatish Balay work[0] = m; work[1] = n; 86079bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 86179bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 86279bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 86379bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 86479bdfe76SSatish Balay } 86579bdfe76SSatish Balay if (m == PETSC_DECIDE) { 86679bdfe76SSatish Balay Mbs = M/bs; 86779bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 86879bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 86979bdfe76SSatish Balay m = mbs*bs; 87079bdfe76SSatish Balay } 87179bdfe76SSatish Balay if (n == PETSC_DECIDE) { 87279bdfe76SSatish Balay Nbs = N/bs; 87379bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 87479bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 87579bdfe76SSatish Balay n = nbs*bs; 87679bdfe76SSatish Balay } 877cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 87879bdfe76SSatish Balay 87979bdfe76SSatish Balay b->m = m; B->m = m; 88079bdfe76SSatish Balay b->n = n; B->n = n; 88179bdfe76SSatish Balay b->N = N; B->N = N; 88279bdfe76SSatish Balay b->M = M; B->M = M; 88379bdfe76SSatish Balay b->bs = bs; 88479bdfe76SSatish Balay b->bs2 = bs*bs; 88579bdfe76SSatish Balay b->mbs = mbs; 88679bdfe76SSatish Balay b->nbs = nbs; 88779bdfe76SSatish Balay b->Mbs = Mbs; 88879bdfe76SSatish Balay b->Nbs = Nbs; 88979bdfe76SSatish Balay 89079bdfe76SSatish Balay /* build local table of row and column ownerships */ 89179bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 89279bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 89379bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 89479bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 89579bdfe76SSatish Balay b->rowners[0] = 0; 89679bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 89779bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 89879bdfe76SSatish Balay } 89979bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 90079bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 90179bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 90279bdfe76SSatish Balay b->cowners[0] = 0; 90379bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 90479bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 90579bdfe76SSatish Balay } 90679bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 90779bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 90879bdfe76SSatish Balay 90979bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 91079bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 91179bdfe76SSatish Balay PLogObjectParent(B,b->A); 91279bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 91379bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 91479bdfe76SSatish Balay PLogObjectParent(B,b->B); 91579bdfe76SSatish Balay 91679bdfe76SSatish Balay /* build cache for off array entries formed */ 91779bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 91879bdfe76SSatish Balay b->colmap = 0; 91979bdfe76SSatish Balay b->garray = 0; 92079bdfe76SSatish Balay b->roworiented = 1; 92179bdfe76SSatish Balay 92279bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 92379bdfe76SSatish Balay b->lvec = 0; 92479bdfe76SSatish Balay b->Mvctx = 0; 92579bdfe76SSatish Balay 92679bdfe76SSatish Balay /* stuff for MatGetRow() */ 92779bdfe76SSatish Balay b->rowindices = 0; 92879bdfe76SSatish Balay b->rowvalues = 0; 92979bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 93079bdfe76SSatish Balay 93179bdfe76SSatish Balay *A = B; 93279bdfe76SSatish Balay return 0; 93379bdfe76SSatish Balay } 93457b952d6SSatish Balay 93557b952d6SSatish Balay #include "sys.h" 93657b952d6SSatish Balay 93757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 93857b952d6SSatish Balay { 93957b952d6SSatish Balay Mat A; 94057b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 94157b952d6SSatish Balay Scalar *vals,*buf; 94257b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 94357b952d6SSatish Balay MPI_Status status; 944cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 94557b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 94657b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 94757b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 94857b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 94957b952d6SSatish Balay 95057b952d6SSatish Balay 95157b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 95257b952d6SSatish Balay bs2 = bs*bs; 95357b952d6SSatish Balay 95457b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 95557b952d6SSatish Balay if (!rank) { 95657b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 95757b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 958cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 95957b952d6SSatish Balay } 96057b952d6SSatish Balay 96157b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 96257b952d6SSatish Balay M = header[1]; N = header[2]; 96357b952d6SSatish Balay 96457b952d6SSatish Balay 96557b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 96657b952d6SSatish Balay 96757b952d6SSatish Balay /* 96857b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 96957b952d6SSatish Balay divisible by the blocksize 97057b952d6SSatish Balay */ 97157b952d6SSatish Balay Mbs = M/bs; 97257b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 97357b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 97457b952d6SSatish Balay else Mbs++; 97557b952d6SSatish Balay if (extra_rows &&!rank) { 97657b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 97757b952d6SSatish Balay } 97857b952d6SSatish Balay /* determine ownership of all rows */ 97957b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 98057b952d6SSatish Balay m = mbs * bs; 981cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 982cee3aa6bSSatish Balay browners = rowners + size + 1; 98357b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 98457b952d6SSatish Balay rowners[0] = 0; 985cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 986cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 98757b952d6SSatish Balay rstart = rowners[rank]; 98857b952d6SSatish Balay rend = rowners[rank+1]; 98957b952d6SSatish Balay 99057b952d6SSatish Balay /* distribute row lengths to all processors */ 99157b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 99257b952d6SSatish Balay if (!rank) { 99357b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 99457b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 99557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 99657b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 997cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 998cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 99957b952d6SSatish Balay PetscFree(sndcounts); 100057b952d6SSatish Balay } 100157b952d6SSatish Balay else { 100257b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 100357b952d6SSatish Balay } 100457b952d6SSatish Balay 100557b952d6SSatish Balay if (!rank) { 100657b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 100757b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 100857b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 100957b952d6SSatish Balay for ( i=0; i<size; i++ ) { 101057b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 101157b952d6SSatish Balay procsnz[i] += rowlengths[j]; 101257b952d6SSatish Balay } 101357b952d6SSatish Balay } 101457b952d6SSatish Balay PetscFree(rowlengths); 101557b952d6SSatish Balay 101657b952d6SSatish Balay /* determine max buffer needed and allocate it */ 101757b952d6SSatish Balay maxnz = 0; 101857b952d6SSatish Balay for ( i=0; i<size; i++ ) { 101957b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 102057b952d6SSatish Balay } 102157b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 102257b952d6SSatish Balay 102357b952d6SSatish Balay /* read in my part of the matrix column indices */ 102457b952d6SSatish Balay nz = procsnz[0]; 102557b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 102657b952d6SSatish Balay mycols = ibuf; 1027cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 102857b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1029cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1030cee3aa6bSSatish Balay 103157b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 103257b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 103357b952d6SSatish Balay nz = procsnz[i]; 103457b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 103557b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 103657b952d6SSatish Balay } 103757b952d6SSatish Balay /* read in the stuff for the last proc */ 103857b952d6SSatish Balay if ( size != 1 ) { 103957b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 104057b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 104157b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1042cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 104357b952d6SSatish Balay } 104457b952d6SSatish Balay PetscFree(cols); 104557b952d6SSatish Balay } 104657b952d6SSatish Balay else { 104757b952d6SSatish Balay /* determine buffer space needed for message */ 104857b952d6SSatish Balay nz = 0; 104957b952d6SSatish Balay for ( i=0; i<m; i++ ) { 105057b952d6SSatish Balay nz += locrowlens[i]; 105157b952d6SSatish Balay } 105257b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 105357b952d6SSatish Balay mycols = ibuf; 105457b952d6SSatish Balay /* receive message of column indices*/ 105557b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 105657b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1057cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 105857b952d6SSatish Balay } 105957b952d6SSatish Balay 106057b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1061cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1062cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 106357b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1064cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 106557b952d6SSatish Balay masked1 = mask + Mbs; 106657b952d6SSatish Balay masked2 = masked1 + Mbs; 106757b952d6SSatish Balay rowcount = 0; nzcount = 0; 106857b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 106957b952d6SSatish Balay dcount = 0; 107057b952d6SSatish Balay odcount = 0; 107157b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 107257b952d6SSatish Balay kmax = locrowlens[rowcount]; 107357b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 107457b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 107557b952d6SSatish Balay if (!mask[tmp]) { 107657b952d6SSatish Balay mask[tmp] = 1; 107757b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 107857b952d6SSatish Balay else masked1[dcount++] = tmp; 107957b952d6SSatish Balay } 108057b952d6SSatish Balay } 108157b952d6SSatish Balay rowcount++; 108257b952d6SSatish Balay } 1083cee3aa6bSSatish Balay 108457b952d6SSatish Balay dlens[i] = dcount; 108557b952d6SSatish Balay odlens[i] = odcount; 1086cee3aa6bSSatish Balay 108757b952d6SSatish Balay /* zero out the mask elements we set */ 108857b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 108957b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 109057b952d6SSatish Balay } 1091cee3aa6bSSatish Balay 109257b952d6SSatish Balay /* create our matrix */ 109357b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 109457b952d6SSatish Balay A = *newmat; 10956d4a8577SBarry Smith MatSetOption(A,MAT_COLUMNS_SORTED); 109657b952d6SSatish Balay 109757b952d6SSatish Balay if (!rank) { 109857b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 109957b952d6SSatish Balay /* read in my part of the matrix numerical values */ 110057b952d6SSatish Balay nz = procsnz[0]; 110157b952d6SSatish Balay vals = buf; 1102cee3aa6bSSatish Balay mycols = ibuf; 1103cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 110457b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1105cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 110657b952d6SSatish Balay /* insert into matrix */ 110757b952d6SSatish Balay jj = rstart*bs; 110857b952d6SSatish Balay for ( i=0; i<m; i++ ) { 110957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 111057b952d6SSatish Balay mycols += locrowlens[i]; 111157b952d6SSatish Balay vals += locrowlens[i]; 111257b952d6SSatish Balay jj++; 111357b952d6SSatish Balay } 111457b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 111557b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 111657b952d6SSatish Balay nz = procsnz[i]; 111757b952d6SSatish Balay vals = buf; 111857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 111957b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 112057b952d6SSatish Balay } 112157b952d6SSatish Balay /* the last proc */ 112257b952d6SSatish Balay if ( size != 1 ){ 112357b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1124cee3aa6bSSatish Balay vals = buf; 112557b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 112657b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1127cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 112857b952d6SSatish Balay } 112957b952d6SSatish Balay PetscFree(procsnz); 113057b952d6SSatish Balay } 113157b952d6SSatish Balay else { 113257b952d6SSatish Balay /* receive numeric values */ 113357b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 113457b952d6SSatish Balay 113557b952d6SSatish Balay /* receive message of values*/ 113657b952d6SSatish Balay vals = buf; 1137cee3aa6bSSatish Balay mycols = ibuf; 113857b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 113957b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1140cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 114157b952d6SSatish Balay 114257b952d6SSatish Balay /* insert into matrix */ 114357b952d6SSatish Balay jj = rstart*bs; 1144cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 114557b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 114657b952d6SSatish Balay mycols += locrowlens[i]; 114757b952d6SSatish Balay vals += locrowlens[i]; 114857b952d6SSatish Balay jj++; 114957b952d6SSatish Balay } 115057b952d6SSatish Balay } 115157b952d6SSatish Balay PetscFree(locrowlens); 115257b952d6SSatish Balay PetscFree(buf); 115357b952d6SSatish Balay PetscFree(ibuf); 115457b952d6SSatish Balay PetscFree(rowners); 115557b952d6SSatish Balay PetscFree(dlens); 1156cee3aa6bSSatish Balay PetscFree(mask); 11576d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 11586d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 115957b952d6SSatish Balay return 0; 116057b952d6SSatish Balay } 116157b952d6SSatish Balay 116257b952d6SSatish Balay 1163