179bdfe76SSatish Balay #ifndef lint 2*bd16c2feSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.7 1996/07/01 23:20:26 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 1357b952d6SSatish Balay 1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1657b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1757b952d6SSatish Balay length of colmap equals the global matrix length. 1857b952d6SSatish Balay */ 1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2057b952d6SSatish Balay { 2157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 2357b952d6SSatish Balay int nbs = B->nbs,i; 2457b952d6SSatish Balay 2557b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 2657b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 2757b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 2857b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 2957b952d6SSatish Balay return 0; 3057b952d6SSatish Balay } 3157b952d6SSatish Balay 3257b952d6SSatish Balay 33acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay int ierr; 3757b952d6SSatish Balay if (baij->size == 1) { 3857b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 3957b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 4057b952d6SSatish Balay return 0; 4157b952d6SSatish Balay } 4257b952d6SSatish Balay 4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4457b952d6SSatish Balay { 4557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4657b952d6SSatish Balay Scalar value; 4757b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 4857b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 4957b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 5057b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 5157b952d6SSatish Balay 5257b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 5357b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 5457b952d6SSatish Balay } 5557b952d6SSatish Balay baij->insertmode = addv; 5657b952d6SSatish Balay rstart_orig = rstart*bs; 5757b952d6SSatish Balay rend_orig = rend*bs; 5857b952d6SSatish Balay cstart_orig = cstart*bs; 5957b952d6SSatish Balay cend_orig = cend*bs; 6057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 6157b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 6257b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 6357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 6457b952d6SSatish Balay row = im[i] - rstart_orig; 6557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 6657b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 6757b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 6857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 6957b952d6SSatish Balay col = in[j] - cstart_orig; 7057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 7157b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 7257b952d6SSatish Balay } 7357b952d6SSatish Balay else { 7457b952d6SSatish Balay if (mat->was_assembled) { 7557b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 7657b952d6SSatish Balay col = baij->colmap[in[j]]; 7757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 7857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 7957b952d6SSatish Balay col = in[j]; 8057b952d6SSatish Balay } 8157b952d6SSatish Balay } 8257b952d6SSatish Balay else col = in[j]; 8357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8457b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8557b952d6SSatish Balay } 8657b952d6SSatish Balay } 8757b952d6SSatish Balay } 8857b952d6SSatish Balay else { 8957b952d6SSatish Balay if (roworiented) { 9057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 9157b952d6SSatish Balay } 9257b952d6SSatish Balay else { 9357b952d6SSatish Balay row = im[i]; 9457b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 9657b952d6SSatish Balay } 9757b952d6SSatish Balay } 9857b952d6SSatish Balay } 9957b952d6SSatish Balay } 10057b952d6SSatish Balay return 0; 10157b952d6SSatish Balay } 10257b952d6SSatish Balay 103d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 104d6de1c52SSatish Balay { 105d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 106d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 107d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 108d6de1c52SSatish Balay 109d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 110d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 111d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 112d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 113d6de1c52SSatish Balay row = idxm[i] - bsrstart; 114d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 115d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 116d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 117d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 118d6de1c52SSatish Balay col = idxn[j] - bscstart; 119d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 120d6de1c52SSatish Balay } 121d6de1c52SSatish Balay else { 122acdf5bf4SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 123d9d09a02SSatish Balay if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0; 124d9d09a02SSatish Balay else { 125d9d09a02SSatish Balay col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs; 126d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 127d6de1c52SSatish Balay } 128d6de1c52SSatish Balay } 129d6de1c52SSatish Balay } 130d9d09a02SSatish Balay } 131d6de1c52SSatish Balay else { 132d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 133d6de1c52SSatish Balay } 134d6de1c52SSatish Balay } 135d6de1c52SSatish Balay return 0; 136d6de1c52SSatish Balay } 137d6de1c52SSatish Balay 138d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 139d6de1c52SSatish Balay { 140d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 141d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 142acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 143d6de1c52SSatish Balay double sum = 0.0; 144d6de1c52SSatish Balay Scalar *v; 145d6de1c52SSatish Balay 146d6de1c52SSatish Balay if (baij->size == 1) { 147d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 148d6de1c52SSatish Balay } else { 149d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 150d6de1c52SSatish Balay v = amat->a; 151d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 152d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 153d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 154d6de1c52SSatish Balay #else 155d6de1c52SSatish Balay sum += (*v)*(*v); v++; 156d6de1c52SSatish Balay #endif 157d6de1c52SSatish Balay } 158d6de1c52SSatish Balay v = bmat->a; 159d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 160d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 161d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 162d6de1c52SSatish Balay #else 163d6de1c52SSatish Balay sum += (*v)*(*v); v++; 164d6de1c52SSatish Balay #endif 165d6de1c52SSatish Balay } 166d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 167d6de1c52SSatish Balay *norm = sqrt(*norm); 168d6de1c52SSatish Balay } 169acdf5bf4SSatish Balay else 170acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 171d6de1c52SSatish Balay } 172d6de1c52SSatish Balay return 0; 173d6de1c52SSatish Balay } 17457b952d6SSatish Balay 17557b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 17657b952d6SSatish Balay { 17757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17857b952d6SSatish Balay MPI_Comm comm = mat->comm; 17957b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 18057b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 18157b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 18257b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 18357b952d6SSatish Balay InsertMode addv; 18457b952d6SSatish Balay Scalar *rvalues,*svalues; 18557b952d6SSatish Balay 18657b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 18757b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 18857b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 18957b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 19057b952d6SSatish Balay } 19157b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 19257b952d6SSatish Balay 19357b952d6SSatish Balay /* first count number of contributors to each processor */ 19457b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 19557b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 19657b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 19757b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 19857b952d6SSatish Balay idx = baij->stash.idx[i]; 19957b952d6SSatish Balay for ( j=0; j<size; j++ ) { 20057b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 20157b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 20257b952d6SSatish Balay } 20357b952d6SSatish Balay } 20457b952d6SSatish Balay } 20557b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 20657b952d6SSatish Balay 20757b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 20857b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 20957b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 21057b952d6SSatish Balay nreceives = work[rank]; 21157b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 21257b952d6SSatish Balay nmax = work[rank]; 21357b952d6SSatish Balay PetscFree(work); 21457b952d6SSatish Balay 21557b952d6SSatish Balay /* post receives: 21657b952d6SSatish Balay 1) each message will consist of ordered pairs 21757b952d6SSatish Balay (global index,value) we store the global index as a double 21857b952d6SSatish Balay to simplify the message passing. 21957b952d6SSatish Balay 2) since we don't know how long each individual message is we 22057b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 22157b952d6SSatish Balay this is a lot of wasted space. 22257b952d6SSatish Balay 22357b952d6SSatish Balay 22457b952d6SSatish Balay This could be done better. 22557b952d6SSatish Balay */ 22657b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 22757b952d6SSatish Balay CHKPTRQ(rvalues); 22857b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 22957b952d6SSatish Balay CHKPTRQ(recv_waits); 23057b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 23157b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 23257b952d6SSatish Balay comm,recv_waits+i); 23357b952d6SSatish Balay } 23457b952d6SSatish Balay 23557b952d6SSatish Balay /* do sends: 23657b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 23757b952d6SSatish Balay the ith processor 23857b952d6SSatish Balay */ 23957b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 24057b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 24157b952d6SSatish Balay CHKPTRQ(send_waits); 24257b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 24357b952d6SSatish Balay starts[0] = 0; 24457b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 24557b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 24657b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 24757b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 24857b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 24957b952d6SSatish Balay } 25057b952d6SSatish Balay PetscFree(owner); 25157b952d6SSatish Balay starts[0] = 0; 25257b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 25357b952d6SSatish Balay count = 0; 25457b952d6SSatish Balay for ( i=0; i<size; i++ ) { 25557b952d6SSatish Balay if (procs[i]) { 25657b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 25757b952d6SSatish Balay comm,send_waits+count++); 25857b952d6SSatish Balay } 25957b952d6SSatish Balay } 26057b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 26157b952d6SSatish Balay 26257b952d6SSatish Balay /* Free cache space */ 26357b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 26457b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 26557b952d6SSatish Balay 26657b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 26757b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 26857b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 26957b952d6SSatish Balay baij->rmax = nmax; 27057b952d6SSatish Balay 27157b952d6SSatish Balay return 0; 27257b952d6SSatish Balay } 27357b952d6SSatish Balay 27457b952d6SSatish Balay 27557b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 27657b952d6SSatish Balay { 27757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27857b952d6SSatish Balay MPI_Status *send_status,recv_status; 27957b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 28057b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 28157b952d6SSatish Balay Scalar *values,val; 28257b952d6SSatish Balay InsertMode addv = baij->insertmode; 28357b952d6SSatish Balay 28457b952d6SSatish Balay /* wait on receives */ 28557b952d6SSatish Balay while (count) { 28657b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 28757b952d6SSatish Balay /* unpack receives into our local space */ 28857b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 28957b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 29057b952d6SSatish Balay n = n/3; 29157b952d6SSatish Balay for ( i=0; i<n; i++ ) { 29257b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 29357b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 29457b952d6SSatish Balay val = values[3*i+2]; 29557b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 29657b952d6SSatish Balay col -= baij->cstart*bs; 29757b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 29857b952d6SSatish Balay } 29957b952d6SSatish Balay else { 30057b952d6SSatish Balay if (mat->was_assembled) { 30157b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 30257b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 30357b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 30457b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 30557b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 30657b952d6SSatish Balay } 30757b952d6SSatish Balay } 30857b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 30957b952d6SSatish Balay } 31057b952d6SSatish Balay } 31157b952d6SSatish Balay count--; 31257b952d6SSatish Balay } 31357b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 31457b952d6SSatish Balay 31557b952d6SSatish Balay /* wait on sends */ 31657b952d6SSatish Balay if (baij->nsends) { 31757b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 31857b952d6SSatish Balay CHKPTRQ(send_status); 31957b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 32057b952d6SSatish Balay PetscFree(send_status); 32157b952d6SSatish Balay } 32257b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 32357b952d6SSatish Balay 32457b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 32557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 32657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 32757b952d6SSatish Balay 32857b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 32957b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 33057b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 33157b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 33257b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 33357b952d6SSatish Balay } 33457b952d6SSatish Balay 33557b952d6SSatish Balay if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 33657b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 33757b952d6SSatish Balay } 33857b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 33957b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 34057b952d6SSatish Balay 34157b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 34257b952d6SSatish Balay return 0; 34357b952d6SSatish Balay } 34457b952d6SSatish Balay 34557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 34657b952d6SSatish Balay { 34757b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 34857b952d6SSatish Balay int ierr; 34957b952d6SSatish Balay 35057b952d6SSatish Balay if (baij->size == 1) { 35157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 35257b952d6SSatish Balay } 35357b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 35457b952d6SSatish Balay return 0; 35557b952d6SSatish Balay } 35657b952d6SSatish Balay 35757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 35857b952d6SSatish Balay { 35957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 360cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 36157b952d6SSatish Balay FILE *fd; 36257b952d6SSatish Balay ViewerType vtype; 36357b952d6SSatish Balay 36457b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36557b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 36657b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 36757b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 36857b952d6SSatish Balay int nz, nzalloc, mem; 36957b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 37057b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 37157b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 37257b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 37357b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 37457b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 37557b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 37657b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 37757b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 37857b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 37957b952d6SSatish Balay fflush(fd); 38057b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 38157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 38257b952d6SSatish Balay return 0; 38357b952d6SSatish Balay } 38457b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 38557b952d6SSatish Balay return 0; 38657b952d6SSatish Balay } 38757b952d6SSatish Balay } 38857b952d6SSatish Balay 38957b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 39057b952d6SSatish Balay Draw draw; 39157b952d6SSatish Balay PetscTruth isnull; 39257b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 39357b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 39457b952d6SSatish Balay } 39557b952d6SSatish Balay 39657b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 39757b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 39857b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 39957b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 40057b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 40157b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 40257b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 40357b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 40457b952d6SSatish Balay fflush(fd); 40557b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 40657b952d6SSatish Balay } 40757b952d6SSatish Balay else { 40857b952d6SSatish Balay int size = baij->size; 40957b952d6SSatish Balay rank = baij->rank; 41057b952d6SSatish Balay if (size == 1) { 41157b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 41257b952d6SSatish Balay } 41357b952d6SSatish Balay else { 41457b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 41557b952d6SSatish Balay Mat A; 41657b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 41757b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 41857b952d6SSatish Balay int mbs=baij->mbs; 41957b952d6SSatish Balay Scalar *a; 42057b952d6SSatish Balay 42157b952d6SSatish Balay if (!rank) { 422cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42357b952d6SSatish Balay CHKERRQ(ierr); 42457b952d6SSatish Balay } 42557b952d6SSatish Balay else { 426cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42757b952d6SSatish Balay CHKERRQ(ierr); 42857b952d6SSatish Balay } 42957b952d6SSatish Balay PLogObjectParent(mat,A); 43057b952d6SSatish Balay 43157b952d6SSatish Balay /* copy over the A part */ 43257b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 43357b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 43457b952d6SSatish Balay row = baij->rstart; 43557b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 43657b952d6SSatish Balay 43757b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 43857b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 43957b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 44057b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 44157b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 44257b952d6SSatish Balay for (k=0; k<bs; k++ ) { 443cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 444cee3aa6bSSatish Balay col++; a += bs; 44557b952d6SSatish Balay } 44657b952d6SSatish Balay } 44757b952d6SSatish Balay } 44857b952d6SSatish Balay /* copy over the B part */ 44957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 45057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 45157b952d6SSatish Balay row = baij->rstart*bs; 45257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 45357b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 45457b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 45557b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 45657b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 45757b952d6SSatish Balay for (k=0; k<bs; k++ ) { 458cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 459cee3aa6bSSatish Balay col++; a += bs; 46057b952d6SSatish Balay } 46157b952d6SSatish Balay } 46257b952d6SSatish Balay } 46357b952d6SSatish Balay PetscFree(rvals); 46457b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 46557b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 46657b952d6SSatish Balay if (!rank) { 46757b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 46857b952d6SSatish Balay } 46957b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 47057b952d6SSatish Balay } 47157b952d6SSatish Balay } 47257b952d6SSatish Balay return 0; 47357b952d6SSatish Balay } 47457b952d6SSatish Balay 47557b952d6SSatish Balay 47657b952d6SSatish Balay 47757b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 47857b952d6SSatish Balay { 47957b952d6SSatish Balay Mat mat = (Mat) obj; 48057b952d6SSatish Balay int ierr; 48157b952d6SSatish Balay ViewerType vtype; 48257b952d6SSatish Balay 48357b952d6SSatish Balay if (!viewer) { 48457b952d6SSatish Balay viewer = STDOUT_VIEWER_SELF; 48557b952d6SSatish Balay } 48657b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 48757b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 48857b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 48957b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 49057b952d6SSatish Balay } 49157b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 49257b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 49357b952d6SSatish Balay } 49457b952d6SSatish Balay return 0; 49557b952d6SSatish Balay } 49657b952d6SSatish Balay 49779bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 49879bdfe76SSatish Balay { 49979bdfe76SSatish Balay Mat mat = (Mat) obj; 50079bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 50179bdfe76SSatish Balay int ierr; 50279bdfe76SSatish Balay 50379bdfe76SSatish Balay #if defined(PETSC_LOG) 50479bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 50579bdfe76SSatish Balay #endif 50679bdfe76SSatish Balay 50779bdfe76SSatish Balay PetscFree(baij->rowners); 50879bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 50979bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 51079bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 51179bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 51279bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 51379bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 51479bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 51579bdfe76SSatish Balay PetscFree(baij); 51679bdfe76SSatish Balay PLogObjectDestroy(mat); 51779bdfe76SSatish Balay PetscHeaderDestroy(mat); 51879bdfe76SSatish Balay return 0; 51979bdfe76SSatish Balay } 52079bdfe76SSatish Balay 521cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 522cee3aa6bSSatish Balay { 523cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 524cee3aa6bSSatish Balay int ierr; 525cee3aa6bSSatish Balay 526cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 527cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 528cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 529cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 530cee3aa6bSSatish Balay return 0; 531cee3aa6bSSatish Balay } 532cee3aa6bSSatish Balay 533cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 534cee3aa6bSSatish Balay { 535cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 536cee3aa6bSSatish Balay int ierr; 537cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 538cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 539cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 540cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 541cee3aa6bSSatish Balay return 0; 542cee3aa6bSSatish Balay } 543cee3aa6bSSatish Balay 544cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 545cee3aa6bSSatish Balay { 546cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 547cee3aa6bSSatish Balay int ierr; 548cee3aa6bSSatish Balay 549cee3aa6bSSatish Balay /* do nondiagonal part */ 550cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 551cee3aa6bSSatish Balay /* send it on its way */ 552cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 553cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 554cee3aa6bSSatish Balay /* do local part */ 555cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 556cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 557cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 558cee3aa6bSSatish Balay /* but is not perhaps always true. */ 559cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 560cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 561cee3aa6bSSatish Balay return 0; 562cee3aa6bSSatish Balay } 563cee3aa6bSSatish Balay 564cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 565cee3aa6bSSatish Balay { 566cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 567cee3aa6bSSatish Balay int ierr; 568cee3aa6bSSatish Balay 569cee3aa6bSSatish Balay /* do nondiagonal part */ 570cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 571cee3aa6bSSatish Balay /* send it on its way */ 572cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 573cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 574cee3aa6bSSatish Balay /* do local part */ 575cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 576cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 577cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 578cee3aa6bSSatish Balay /* but is not perhaps always true. */ 579cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 580cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 581cee3aa6bSSatish Balay return 0; 582cee3aa6bSSatish Balay } 583cee3aa6bSSatish Balay 584cee3aa6bSSatish Balay /* 585cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 586cee3aa6bSSatish Balay diagonal block 587cee3aa6bSSatish Balay */ 588cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 589cee3aa6bSSatish Balay { 590cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 591cee3aa6bSSatish Balay if (a->M != a->N) 592cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 593cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 594cee3aa6bSSatish Balay } 595cee3aa6bSSatish Balay 596cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 597cee3aa6bSSatish Balay { 598cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 599cee3aa6bSSatish Balay int ierr; 600cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 601cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 602cee3aa6bSSatish Balay return 0; 603cee3aa6bSSatish Balay } 60457b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 60557b952d6SSatish Balay { 60657b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 60757b952d6SSatish Balay *m = mat->M; *n = mat->N; 60857b952d6SSatish Balay return 0; 60957b952d6SSatish Balay } 61057b952d6SSatish Balay 61157b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 61257b952d6SSatish Balay { 61357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 61457b952d6SSatish Balay *m = mat->m; *n = mat->N; 61557b952d6SSatish Balay return 0; 61657b952d6SSatish Balay } 61757b952d6SSatish Balay 61857b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 61957b952d6SSatish Balay { 62057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 62157b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 62257b952d6SSatish Balay return 0; 62357b952d6SSatish Balay } 62457b952d6SSatish Balay 625acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 626acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 627acdf5bf4SSatish Balay 628acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 629acdf5bf4SSatish Balay { 630acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 631acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 632acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 633d9d09a02SSatish Balay int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 634d9d09a02SSatish Balay int *cmap, *idx_p,cstart = mat->cstart; 635acdf5bf4SSatish Balay 636acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 637acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 638acdf5bf4SSatish Balay 639acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 640acdf5bf4SSatish Balay /* 641acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 642acdf5bf4SSatish Balay */ 643acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 644*bd16c2feSSatish Balay int max = 1,mbs = mat->mbs,tmp; 645*bd16c2feSSatish Balay for ( i=0; i<mbs; i++ ) { 646acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 647acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 648acdf5bf4SSatish Balay } 649acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 650acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 651acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 652acdf5bf4SSatish Balay } 653acdf5bf4SSatish Balay 654acdf5bf4SSatish Balay 655d9d09a02SSatish Balay if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 656d9d09a02SSatish Balay lrow = row - brstart; 657acdf5bf4SSatish Balay 658acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 659acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 660acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 661acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 662acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 663acdf5bf4SSatish Balay nztot = nzA + nzB; 664acdf5bf4SSatish Balay 665acdf5bf4SSatish Balay cmap = mat->garray; 666acdf5bf4SSatish Balay if (v || idx) { 667acdf5bf4SSatish Balay if (nztot) { 668acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 669acdf5bf4SSatish Balay int imark = -1; 670acdf5bf4SSatish Balay if (v) { 671acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 672acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 673d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 674acdf5bf4SSatish Balay else break; 675acdf5bf4SSatish Balay } 676acdf5bf4SSatish Balay imark = i; 677acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 678acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 679acdf5bf4SSatish Balay } 680acdf5bf4SSatish Balay if (idx) { 681acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 682acdf5bf4SSatish Balay if (imark > -1) { 683acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 684*bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 685acdf5bf4SSatish Balay } 686acdf5bf4SSatish Balay } else { 687acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 688d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) 689d9d09a02SSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 690acdf5bf4SSatish Balay else break; 691acdf5bf4SSatish Balay } 692acdf5bf4SSatish Balay imark = i; 693acdf5bf4SSatish Balay } 694d9d09a02SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 695d9d09a02SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 696acdf5bf4SSatish Balay } 697acdf5bf4SSatish Balay } 698acdf5bf4SSatish Balay else {*idx = 0; *v=0;} 699acdf5bf4SSatish Balay } 700acdf5bf4SSatish Balay *nz = nztot; 701acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 702acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 703acdf5bf4SSatish Balay return 0; 704acdf5bf4SSatish Balay } 705acdf5bf4SSatish Balay 706acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 707acdf5bf4SSatish Balay { 708acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 709acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 710acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 711acdf5bf4SSatish Balay } 712acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 713acdf5bf4SSatish Balay return 0; 714acdf5bf4SSatish Balay } 715acdf5bf4SSatish Balay 71679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 71779bdfe76SSatish Balay static struct _MatOps MatOps = { 718*bd16c2feSSatish Balay MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 719cee3aa6bSSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 72079bdfe76SSatish Balay 0,0,0,0, 72179bdfe76SSatish Balay 0,0,0,0, 722acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 72357b952d6SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 72457b952d6SSatish Balay 0,0,MatGetReordering_MPIBAIJ,0, 72557b952d6SSatish Balay 0,0,0,MatGetSize_MPIBAIJ, 72657b952d6SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 72779bdfe76SSatish Balay 0,0,0,0, 72879bdfe76SSatish Balay 0,0,0,0, 72979bdfe76SSatish Balay 0,0,0,0, 730acdf5bf4SSatish Balay 0,MatGetValues_MPIBAIJ,0,0, 731cee3aa6bSSatish Balay MatScale_MPIBAIJ,0,0}; 73279bdfe76SSatish Balay 73379bdfe76SSatish Balay 73479bdfe76SSatish Balay /*@C 73579bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 73679bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 73779bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 73879bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 73979bdfe76SSatish Balay performance can be increased by more than a factor of 50. 74079bdfe76SSatish Balay 74179bdfe76SSatish Balay Input Parameters: 74279bdfe76SSatish Balay . comm - MPI communicator 74379bdfe76SSatish Balay . bs - size of blockk 74479bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 74579bdfe76SSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated 74679bdfe76SSatish Balay if N is given) 74779bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 74879bdfe76SSatish Balay . N - number of global columns (or PETSC_DECIDE to have calculated 74979bdfe76SSatish Balay if n is given) 75079bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 75179bdfe76SSatish Balay submatrix (same for all local rows) 75279bdfe76SSatish Balay . d_nzz - number of block nonzeros per block row in diagonal portion of local 75379bdfe76SSatish Balay submatrix or null (possibly different for each row). You must leave 75479bdfe76SSatish Balay room for the diagonal entry even if it is zero. 75579bdfe76SSatish Balay . o_nz - number of block nonzeros per block row in off-diagonal portion of local 75679bdfe76SSatish Balay submatrix (same for all local rows). 75779bdfe76SSatish Balay . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 75879bdfe76SSatish Balay submatrix or null (possibly different for each row). 75979bdfe76SSatish Balay 76079bdfe76SSatish Balay Output Parameter: 76179bdfe76SSatish Balay . A - the matrix 76279bdfe76SSatish Balay 76379bdfe76SSatish Balay Notes: 76479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 76579bdfe76SSatish Balay (possibly both). 76679bdfe76SSatish Balay 76779bdfe76SSatish Balay Storage Information: 76879bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 76979bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 77079bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 77179bdfe76SSatish Balay local matrix (a rectangular submatrix). 77279bdfe76SSatish Balay 77379bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 77479bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 77579bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 77679bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 77779bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 77879bdfe76SSatish Balay 77979bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 78079bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 78179bdfe76SSatish Balay 78279bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 78379bdfe76SSatish Balay $ ------------------- 78479bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 78579bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 78679bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 78779bdfe76SSatish Balay $ ------------------- 78879bdfe76SSatish Balay $ 78979bdfe76SSatish Balay 79079bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 79179bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 79279bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 79357b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 79479bdfe76SSatish Balay 79579bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 79679bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 79779bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 79879bdfe76SSatish Balay one expects d_nz >> o_nz. For additional details, see the users manual 79979bdfe76SSatish Balay chapter on matrices and the file $(PETSC_DIR)/Performance. 80079bdfe76SSatish Balay 80179bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel 80279bdfe76SSatish Balay 80379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 80479bdfe76SSatish Balay @*/ 80579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 80679bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 80779bdfe76SSatish Balay { 80879bdfe76SSatish Balay Mat B; 80979bdfe76SSatish Balay Mat_MPIBAIJ *b; 810cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 81179bdfe76SSatish Balay 81279bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 81379bdfe76SSatish Balay *A = 0; 81479bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 81579bdfe76SSatish Balay PLogObjectCreate(B); 81679bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 81779bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 81879bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 81979bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 82079bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 82157b952d6SSatish Balay 82279bdfe76SSatish Balay B->factor = 0; 82379bdfe76SSatish Balay B->assembled = PETSC_FALSE; 82479bdfe76SSatish Balay 82579bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 82679bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 82779bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 82879bdfe76SSatish Balay 82979bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 83057b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 831cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 832cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 833cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 834cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 83579bdfe76SSatish Balay 83679bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 83779bdfe76SSatish Balay work[0] = m; work[1] = n; 83879bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 83979bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 84079bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 84179bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 84279bdfe76SSatish Balay } 84379bdfe76SSatish Balay if (m == PETSC_DECIDE) { 84479bdfe76SSatish Balay Mbs = M/bs; 84579bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 84679bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 84779bdfe76SSatish Balay m = mbs*bs; 84879bdfe76SSatish Balay } 84979bdfe76SSatish Balay if (n == PETSC_DECIDE) { 85079bdfe76SSatish Balay Nbs = N/bs; 85179bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 85279bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 85379bdfe76SSatish Balay n = nbs*bs; 85479bdfe76SSatish Balay } 855cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 85679bdfe76SSatish Balay 85779bdfe76SSatish Balay b->m = m; B->m = m; 85879bdfe76SSatish Balay b->n = n; B->n = n; 85979bdfe76SSatish Balay b->N = N; B->N = N; 86079bdfe76SSatish Balay b->M = M; B->M = M; 86179bdfe76SSatish Balay b->bs = bs; 86279bdfe76SSatish Balay b->bs2 = bs*bs; 86379bdfe76SSatish Balay b->mbs = mbs; 86479bdfe76SSatish Balay b->nbs = nbs; 86579bdfe76SSatish Balay b->Mbs = Mbs; 86679bdfe76SSatish Balay b->Nbs = Nbs; 86779bdfe76SSatish Balay 86879bdfe76SSatish Balay /* build local table of row and column ownerships */ 86979bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 87079bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 87179bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 87279bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 87379bdfe76SSatish Balay b->rowners[0] = 0; 87479bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 87579bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 87679bdfe76SSatish Balay } 87779bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 87879bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 87979bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 88079bdfe76SSatish Balay b->cowners[0] = 0; 88179bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 88279bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 88379bdfe76SSatish Balay } 88479bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 88579bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 88679bdfe76SSatish Balay 88779bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 88879bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 88979bdfe76SSatish Balay PLogObjectParent(B,b->A); 89079bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 89179bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 89279bdfe76SSatish Balay PLogObjectParent(B,b->B); 89379bdfe76SSatish Balay 89479bdfe76SSatish Balay /* build cache for off array entries formed */ 89579bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 89679bdfe76SSatish Balay b->colmap = 0; 89779bdfe76SSatish Balay b->garray = 0; 89879bdfe76SSatish Balay b->roworiented = 1; 89979bdfe76SSatish Balay 90079bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 90179bdfe76SSatish Balay b->lvec = 0; 90279bdfe76SSatish Balay b->Mvctx = 0; 90379bdfe76SSatish Balay 90479bdfe76SSatish Balay /* stuff for MatGetRow() */ 90579bdfe76SSatish Balay b->rowindices = 0; 90679bdfe76SSatish Balay b->rowvalues = 0; 90779bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 90879bdfe76SSatish Balay 90979bdfe76SSatish Balay *A = B; 91079bdfe76SSatish Balay return 0; 91179bdfe76SSatish Balay } 91257b952d6SSatish Balay 91357b952d6SSatish Balay #include "sys.h" 91457b952d6SSatish Balay 91557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 91657b952d6SSatish Balay { 91757b952d6SSatish Balay Mat A; 91857b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 91957b952d6SSatish Balay Scalar *vals,*buf; 92057b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 92157b952d6SSatish Balay MPI_Status status; 922cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 92357b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 92457b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 92557b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 92657b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 92757b952d6SSatish Balay 92857b952d6SSatish Balay 92957b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 93057b952d6SSatish Balay bs2 = bs*bs; 93157b952d6SSatish Balay 93257b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 93357b952d6SSatish Balay if (!rank) { 93457b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 93557b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 936cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 93757b952d6SSatish Balay } 93857b952d6SSatish Balay 93957b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 94057b952d6SSatish Balay M = header[1]; N = header[2]; 94157b952d6SSatish Balay 94257b952d6SSatish Balay 94357b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 94457b952d6SSatish Balay 94557b952d6SSatish Balay /* 94657b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 94757b952d6SSatish Balay divisible by the blocksize 94857b952d6SSatish Balay */ 94957b952d6SSatish Balay Mbs = M/bs; 95057b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 95157b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 95257b952d6SSatish Balay else Mbs++; 95357b952d6SSatish Balay if (extra_rows &&!rank) { 95457b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 95557b952d6SSatish Balay } 95657b952d6SSatish Balay /* determine ownership of all rows */ 95757b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 95857b952d6SSatish Balay m = mbs * bs; 959cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 960cee3aa6bSSatish Balay browners = rowners + size + 1; 96157b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 96257b952d6SSatish Balay rowners[0] = 0; 963cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 964cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 96557b952d6SSatish Balay rstart = rowners[rank]; 96657b952d6SSatish Balay rend = rowners[rank+1]; 96757b952d6SSatish Balay 96857b952d6SSatish Balay /* distribute row lengths to all processors */ 96957b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 97057b952d6SSatish Balay if (!rank) { 97157b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 97257b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 97357b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 97457b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 975cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 976cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 97757b952d6SSatish Balay PetscFree(sndcounts); 97857b952d6SSatish Balay } 97957b952d6SSatish Balay else { 98057b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 98157b952d6SSatish Balay } 98257b952d6SSatish Balay 98357b952d6SSatish Balay if (!rank) { 98457b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 98557b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 98657b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 98757b952d6SSatish Balay for ( i=0; i<size; i++ ) { 98857b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 98957b952d6SSatish Balay procsnz[i] += rowlengths[j]; 99057b952d6SSatish Balay } 99157b952d6SSatish Balay } 99257b952d6SSatish Balay PetscFree(rowlengths); 99357b952d6SSatish Balay 99457b952d6SSatish Balay /* determine max buffer needed and allocate it */ 99557b952d6SSatish Balay maxnz = 0; 99657b952d6SSatish Balay for ( i=0; i<size; i++ ) { 99757b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 99857b952d6SSatish Balay } 99957b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 100057b952d6SSatish Balay 100157b952d6SSatish Balay /* read in my part of the matrix column indices */ 100257b952d6SSatish Balay nz = procsnz[0]; 100357b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 100457b952d6SSatish Balay mycols = ibuf; 1005cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 100657b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1007cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1008cee3aa6bSSatish Balay 100957b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 101057b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 101157b952d6SSatish Balay nz = procsnz[i]; 101257b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 101357b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 101457b952d6SSatish Balay } 101557b952d6SSatish Balay /* read in the stuff for the last proc */ 101657b952d6SSatish Balay if ( size != 1 ) { 101757b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 101857b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 101957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1020cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 102157b952d6SSatish Balay } 102257b952d6SSatish Balay PetscFree(cols); 102357b952d6SSatish Balay } 102457b952d6SSatish Balay else { 102557b952d6SSatish Balay /* determine buffer space needed for message */ 102657b952d6SSatish Balay nz = 0; 102757b952d6SSatish Balay for ( i=0; i<m; i++ ) { 102857b952d6SSatish Balay nz += locrowlens[i]; 102957b952d6SSatish Balay } 103057b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 103157b952d6SSatish Balay mycols = ibuf; 103257b952d6SSatish Balay /* receive message of column indices*/ 103357b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 103457b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1035cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 103657b952d6SSatish Balay } 103757b952d6SSatish Balay 103857b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1039cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1040cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 104157b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1042cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 104357b952d6SSatish Balay masked1 = mask + Mbs; 104457b952d6SSatish Balay masked2 = masked1 + Mbs; 104557b952d6SSatish Balay rowcount = 0; nzcount = 0; 104657b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 104757b952d6SSatish Balay dcount = 0; 104857b952d6SSatish Balay odcount = 0; 104957b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 105057b952d6SSatish Balay kmax = locrowlens[rowcount]; 105157b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 105257b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 105357b952d6SSatish Balay if (!mask[tmp]) { 105457b952d6SSatish Balay mask[tmp] = 1; 105557b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 105657b952d6SSatish Balay else masked1[dcount++] = tmp; 105757b952d6SSatish Balay } 105857b952d6SSatish Balay } 105957b952d6SSatish Balay rowcount++; 106057b952d6SSatish Balay } 1061cee3aa6bSSatish Balay 106257b952d6SSatish Balay dlens[i] = dcount; 106357b952d6SSatish Balay odlens[i] = odcount; 1064cee3aa6bSSatish Balay 106557b952d6SSatish Balay /* zero out the mask elements we set */ 106657b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 106757b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 106857b952d6SSatish Balay } 1069cee3aa6bSSatish Balay 107057b952d6SSatish Balay /* create our matrix */ 107157b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 107257b952d6SSatish Balay A = *newmat; 107357b952d6SSatish Balay MatSetOption(A,COLUMNS_SORTED); 107457b952d6SSatish Balay 107557b952d6SSatish Balay if (!rank) { 107657b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 107757b952d6SSatish Balay /* read in my part of the matrix numerical values */ 107857b952d6SSatish Balay nz = procsnz[0]; 107957b952d6SSatish Balay vals = buf; 1080cee3aa6bSSatish Balay mycols = ibuf; 1081cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 108257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1083cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 108457b952d6SSatish Balay /* insert into matrix */ 108557b952d6SSatish Balay jj = rstart*bs; 108657b952d6SSatish Balay for ( i=0; i<m; i++ ) { 108757b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 108857b952d6SSatish Balay mycols += locrowlens[i]; 108957b952d6SSatish Balay vals += locrowlens[i]; 109057b952d6SSatish Balay jj++; 109157b952d6SSatish Balay } 109257b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 109357b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 109457b952d6SSatish Balay nz = procsnz[i]; 109557b952d6SSatish Balay vals = buf; 109657b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 109757b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 109857b952d6SSatish Balay } 109957b952d6SSatish Balay /* the last proc */ 110057b952d6SSatish Balay if ( size != 1 ){ 110157b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1102cee3aa6bSSatish Balay vals = buf; 110357b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 110457b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1105cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 110657b952d6SSatish Balay } 110757b952d6SSatish Balay PetscFree(procsnz); 110857b952d6SSatish Balay } 110957b952d6SSatish Balay else { 111057b952d6SSatish Balay /* receive numeric values */ 111157b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 111257b952d6SSatish Balay 111357b952d6SSatish Balay /* receive message of values*/ 111457b952d6SSatish Balay vals = buf; 1115cee3aa6bSSatish Balay mycols = ibuf; 111657b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 111757b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1118cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 111957b952d6SSatish Balay 112057b952d6SSatish Balay /* insert into matrix */ 112157b952d6SSatish Balay jj = rstart*bs; 1122cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 112357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 112457b952d6SSatish Balay mycols += locrowlens[i]; 112557b952d6SSatish Balay vals += locrowlens[i]; 112657b952d6SSatish Balay jj++; 112757b952d6SSatish Balay } 112857b952d6SSatish Balay } 112957b952d6SSatish Balay PetscFree(locrowlens); 113057b952d6SSatish Balay PetscFree(buf); 113157b952d6SSatish Balay PetscFree(ibuf); 113257b952d6SSatish Balay PetscFree(rowners); 113357b952d6SSatish Balay PetscFree(dlens); 1134cee3aa6bSSatish Balay PetscFree(mask); 113557b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 113657b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 113757b952d6SSatish Balay return 0; 113857b952d6SSatish Balay } 113957b952d6SSatish Balay 114057b952d6SSatish Balay 1141