179bdfe76SSatish Balay #ifndef lint 2*acdf5bf4SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.5 1996/06/24 22:18:52 balay Exp balay $"; 379bdfe76SSatish Balay #endif 479bdfe76SSatish Balay 579bdfe76SSatish Balay #include "mpibaij.h" 679bdfe76SSatish Balay 779bdfe76SSatish Balay 857b952d6SSatish Balay #include "draw.h" 957b952d6SSatish Balay #include "pinclude/pviewer.h" 1057b952d6SSatish Balay 1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat); 1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat); 1357b952d6SSatish Balay 1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column 1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local 1657b952d6SSatish Balay storage of the matrix. This is done in a non scable way since the 1757b952d6SSatish Balay length of colmap equals the global matrix length. 1857b952d6SSatish Balay */ 1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat) 2057b952d6SSatish Balay { 2157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 2257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 2357b952d6SSatish Balay int nbs = B->nbs,i; 2457b952d6SSatish Balay 2557b952d6SSatish Balay baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap); 2657b952d6SSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 2757b952d6SSatish Balay PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 2857b952d6SSatish Balay for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i; 2957b952d6SSatish Balay return 0; 3057b952d6SSatish Balay } 3157b952d6SSatish Balay 3257b952d6SSatish Balay 33*acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm) 3457b952d6SSatish Balay { 3557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 3657b952d6SSatish Balay int ierr; 3757b952d6SSatish Balay if (baij->size == 1) { 3857b952d6SSatish Balay ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr); 3957b952d6SSatish Balay } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel"); 4057b952d6SSatish Balay return 0; 4157b952d6SSatish Balay } 4257b952d6SSatish Balay 4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 4457b952d6SSatish Balay { 4557b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 4657b952d6SSatish Balay Scalar value; 4757b952d6SSatish Balay int ierr,i,j, rstart = baij->rstart, rend = baij->rend; 4857b952d6SSatish Balay int cstart = baij->cstart, cend = baij->cend,row,col; 4957b952d6SSatish Balay int roworiented = baij->roworiented,rstart_orig,rend_orig; 5057b952d6SSatish Balay int cstart_orig,cend_orig,bs=baij->bs; 5157b952d6SSatish Balay 5257b952d6SSatish Balay if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) { 5357b952d6SSatish Balay SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds"); 5457b952d6SSatish Balay } 5557b952d6SSatish Balay baij->insertmode = addv; 5657b952d6SSatish Balay rstart_orig = rstart*bs; 5757b952d6SSatish Balay rend_orig = rend*bs; 5857b952d6SSatish Balay cstart_orig = cstart*bs; 5957b952d6SSatish Balay cend_orig = cend*bs; 6057b952d6SSatish Balay for ( i=0; i<m; i++ ) { 6157b952d6SSatish Balay if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row"); 6257b952d6SSatish Balay if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large"); 6357b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 6457b952d6SSatish Balay row = im[i] - rstart_orig; 6557b952d6SSatish Balay for ( j=0; j<n; j++ ) { 6657b952d6SSatish Balay if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column"); 6757b952d6SSatish Balay if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large"); 6857b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ 6957b952d6SSatish Balay col = in[j] - cstart_orig; 7057b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 7157b952d6SSatish Balay ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 7257b952d6SSatish Balay } 7357b952d6SSatish Balay else { 7457b952d6SSatish Balay if (mat->was_assembled) { 7557b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 7657b952d6SSatish Balay col = baij->colmap[in[j]]; 7757b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 7857b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 7957b952d6SSatish Balay col = in[j]; 8057b952d6SSatish Balay } 8157b952d6SSatish Balay } 8257b952d6SSatish Balay else col = in[j]; 8357b952d6SSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 8457b952d6SSatish Balay ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); 8557b952d6SSatish Balay } 8657b952d6SSatish Balay } 8757b952d6SSatish Balay } 8857b952d6SSatish Balay else { 8957b952d6SSatish Balay if (roworiented) { 9057b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 9157b952d6SSatish Balay } 9257b952d6SSatish Balay else { 9357b952d6SSatish Balay row = im[i]; 9457b952d6SSatish Balay for ( j=0; j<n; j++ ) { 9557b952d6SSatish Balay ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 9657b952d6SSatish Balay } 9757b952d6SSatish Balay } 9857b952d6SSatish Balay } 9957b952d6SSatish Balay } 10057b952d6SSatish Balay return 0; 10157b952d6SSatish Balay } 10257b952d6SSatish Balay 103d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 104d6de1c52SSatish Balay { 105d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 106d6de1c52SSatish Balay int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 107d6de1c52SSatish Balay int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 108d6de1c52SSatish Balay 109d6de1c52SSatish Balay for ( i=0; i<m; i++ ) { 110d6de1c52SSatish Balay if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row"); 111d6de1c52SSatish Balay if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large"); 112d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 113d6de1c52SSatish Balay row = idxm[i] - bsrstart; 114d6de1c52SSatish Balay for ( j=0; j<n; j++ ) { 115d6de1c52SSatish Balay if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column"); 116d6de1c52SSatish Balay if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large"); 117d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 118d6de1c52SSatish Balay col = idxn[j] - bscstart; 119d6de1c52SSatish Balay ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 120d6de1c52SSatish Balay } 121d6de1c52SSatish Balay else { 122*acdf5bf4SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);} 123d6de1c52SSatish Balay col = baij->colmap[idxn[j]/bs]*bs + col%bs; 124d6de1c52SSatish Balay ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 125d6de1c52SSatish Balay } 126d6de1c52SSatish Balay } 127d6de1c52SSatish Balay } 128d6de1c52SSatish Balay else { 129d6de1c52SSatish Balay SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported"); 130d6de1c52SSatish Balay } 131d6de1c52SSatish Balay } 132d6de1c52SSatish Balay return 0; 133d6de1c52SSatish Balay } 134d6de1c52SSatish Balay 135d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 136d6de1c52SSatish Balay { 137d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 138d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 139*acdf5bf4SSatish Balay int ierr, i,bs2=baij->bs2; 140d6de1c52SSatish Balay double sum = 0.0; 141d6de1c52SSatish Balay Scalar *v; 142d6de1c52SSatish Balay 143d6de1c52SSatish Balay if (baij->size == 1) { 144d6de1c52SSatish Balay ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 145d6de1c52SSatish Balay } else { 146d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 147d6de1c52SSatish Balay v = amat->a; 148d6de1c52SSatish Balay for (i=0; i<amat->nz*bs2; i++ ) { 149d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 150d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 151d6de1c52SSatish Balay #else 152d6de1c52SSatish Balay sum += (*v)*(*v); v++; 153d6de1c52SSatish Balay #endif 154d6de1c52SSatish Balay } 155d6de1c52SSatish Balay v = bmat->a; 156d6de1c52SSatish Balay for (i=0; i<bmat->nz*bs2; i++ ) { 157d6de1c52SSatish Balay #if defined(PETSC_COMPLEX) 158d6de1c52SSatish Balay sum += real(conj(*v)*(*v)); v++; 159d6de1c52SSatish Balay #else 160d6de1c52SSatish Balay sum += (*v)*(*v); v++; 161d6de1c52SSatish Balay #endif 162d6de1c52SSatish Balay } 163d6de1c52SSatish Balay MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 164d6de1c52SSatish Balay *norm = sqrt(*norm); 165d6de1c52SSatish Balay } 166*acdf5bf4SSatish Balay else 167*acdf5bf4SSatish Balay SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 168d6de1c52SSatish Balay } 169d6de1c52SSatish Balay return 0; 170d6de1c52SSatish Balay } 17157b952d6SSatish Balay 17257b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 17357b952d6SSatish Balay { 17457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17557b952d6SSatish Balay MPI_Comm comm = mat->comm; 17657b952d6SSatish Balay int size = baij->size, *owners = baij->rowners,bs=baij->bs; 17757b952d6SSatish Balay int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 17857b952d6SSatish Balay MPI_Request *send_waits,*recv_waits; 17957b952d6SSatish Balay int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 18057b952d6SSatish Balay InsertMode addv; 18157b952d6SSatish Balay Scalar *rvalues,*svalues; 18257b952d6SSatish Balay 18357b952d6SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 18457b952d6SSatish Balay MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 18557b952d6SSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 18657b952d6SSatish Balay SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added"); 18757b952d6SSatish Balay } 18857b952d6SSatish Balay baij->insertmode = addv; /* in case this processor had no cache */ 18957b952d6SSatish Balay 19057b952d6SSatish Balay /* first count number of contributors to each processor */ 19157b952d6SSatish Balay nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 19257b952d6SSatish Balay PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 19357b952d6SSatish Balay owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 19457b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 19557b952d6SSatish Balay idx = baij->stash.idx[i]; 19657b952d6SSatish Balay for ( j=0; j<size; j++ ) { 19757b952d6SSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 19857b952d6SSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; break; 19957b952d6SSatish Balay } 20057b952d6SSatish Balay } 20157b952d6SSatish Balay } 20257b952d6SSatish Balay nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 20357b952d6SSatish Balay 20457b952d6SSatish Balay /* inform other processors of number of messages and max length*/ 20557b952d6SSatish Balay work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 20657b952d6SSatish Balay MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 20757b952d6SSatish Balay nreceives = work[rank]; 20857b952d6SSatish Balay MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 20957b952d6SSatish Balay nmax = work[rank]; 21057b952d6SSatish Balay PetscFree(work); 21157b952d6SSatish Balay 21257b952d6SSatish Balay /* post receives: 21357b952d6SSatish Balay 1) each message will consist of ordered pairs 21457b952d6SSatish Balay (global index,value) we store the global index as a double 21557b952d6SSatish Balay to simplify the message passing. 21657b952d6SSatish Balay 2) since we don't know how long each individual message is we 21757b952d6SSatish Balay allocate the largest needed buffer for each receive. Potentially 21857b952d6SSatish Balay this is a lot of wasted space. 21957b952d6SSatish Balay 22057b952d6SSatish Balay 22157b952d6SSatish Balay This could be done better. 22257b952d6SSatish Balay */ 22357b952d6SSatish Balay rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 22457b952d6SSatish Balay CHKPTRQ(rvalues); 22557b952d6SSatish Balay recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 22657b952d6SSatish Balay CHKPTRQ(recv_waits); 22757b952d6SSatish Balay for ( i=0; i<nreceives; i++ ) { 22857b952d6SSatish Balay MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 22957b952d6SSatish Balay comm,recv_waits+i); 23057b952d6SSatish Balay } 23157b952d6SSatish Balay 23257b952d6SSatish Balay /* do sends: 23357b952d6SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 23457b952d6SSatish Balay the ith processor 23557b952d6SSatish Balay */ 23657b952d6SSatish Balay svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 23757b952d6SSatish Balay send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 23857b952d6SSatish Balay CHKPTRQ(send_waits); 23957b952d6SSatish Balay starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 24057b952d6SSatish Balay starts[0] = 0; 24157b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 24257b952d6SSatish Balay for ( i=0; i<baij->stash.n; i++ ) { 24357b952d6SSatish Balay svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 24457b952d6SSatish Balay svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 24557b952d6SSatish Balay svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 24657b952d6SSatish Balay } 24757b952d6SSatish Balay PetscFree(owner); 24857b952d6SSatish Balay starts[0] = 0; 24957b952d6SSatish Balay for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 25057b952d6SSatish Balay count = 0; 25157b952d6SSatish Balay for ( i=0; i<size; i++ ) { 25257b952d6SSatish Balay if (procs[i]) { 25357b952d6SSatish Balay MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 25457b952d6SSatish Balay comm,send_waits+count++); 25557b952d6SSatish Balay } 25657b952d6SSatish Balay } 25757b952d6SSatish Balay PetscFree(starts); PetscFree(nprocs); 25857b952d6SSatish Balay 25957b952d6SSatish Balay /* Free cache space */ 26057b952d6SSatish Balay PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n); 26157b952d6SSatish Balay ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 26257b952d6SSatish Balay 26357b952d6SSatish Balay baij->svalues = svalues; baij->rvalues = rvalues; 26457b952d6SSatish Balay baij->nsends = nsends; baij->nrecvs = nreceives; 26557b952d6SSatish Balay baij->send_waits = send_waits; baij->recv_waits = recv_waits; 26657b952d6SSatish Balay baij->rmax = nmax; 26757b952d6SSatish Balay 26857b952d6SSatish Balay return 0; 26957b952d6SSatish Balay } 27057b952d6SSatish Balay 27157b952d6SSatish Balay 27257b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 27357b952d6SSatish Balay { 27457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 27557b952d6SSatish Balay MPI_Status *send_status,recv_status; 27657b952d6SSatish Balay int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 27757b952d6SSatish Balay int bs=baij->bs,row,col,other_disassembled; 27857b952d6SSatish Balay Scalar *values,val; 27957b952d6SSatish Balay InsertMode addv = baij->insertmode; 28057b952d6SSatish Balay 28157b952d6SSatish Balay /* wait on receives */ 28257b952d6SSatish Balay while (count) { 28357b952d6SSatish Balay MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status); 28457b952d6SSatish Balay /* unpack receives into our local space */ 28557b952d6SSatish Balay values = baij->rvalues + 3*imdex*baij->rmax; 28657b952d6SSatish Balay MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 28757b952d6SSatish Balay n = n/3; 28857b952d6SSatish Balay for ( i=0; i<n; i++ ) { 28957b952d6SSatish Balay row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 29057b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 29157b952d6SSatish Balay val = values[3*i+2]; 29257b952d6SSatish Balay if (col >= baij->cstart*bs && col < baij->cend*bs) { 29357b952d6SSatish Balay col -= baij->cstart*bs; 29457b952d6SSatish Balay MatSetValues(baij->A,1,&row,1,&col,&val,addv); 29557b952d6SSatish Balay } 29657b952d6SSatish Balay else { 29757b952d6SSatish Balay if (mat->was_assembled) { 29857b952d6SSatish Balay if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);} 29957b952d6SSatish Balay col = baij->colmap[col/bs]*bs + col%bs; 30057b952d6SSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 30157b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 30257b952d6SSatish Balay col = (int) PetscReal(values[3*i+1]); 30357b952d6SSatish Balay } 30457b952d6SSatish Balay } 30557b952d6SSatish Balay MatSetValues(baij->B,1,&row,1,&col,&val,addv); 30657b952d6SSatish Balay } 30757b952d6SSatish Balay } 30857b952d6SSatish Balay count--; 30957b952d6SSatish Balay } 31057b952d6SSatish Balay PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 31157b952d6SSatish Balay 31257b952d6SSatish Balay /* wait on sends */ 31357b952d6SSatish Balay if (baij->nsends) { 31457b952d6SSatish Balay send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status)); 31557b952d6SSatish Balay CHKPTRQ(send_status); 31657b952d6SSatish Balay MPI_Waitall(baij->nsends,baij->send_waits,send_status); 31757b952d6SSatish Balay PetscFree(send_status); 31857b952d6SSatish Balay } 31957b952d6SSatish Balay PetscFree(baij->send_waits); PetscFree(baij->svalues); 32057b952d6SSatish Balay 32157b952d6SSatish Balay baij->insertmode = NOT_SET_VALUES; 32257b952d6SSatish Balay ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 32357b952d6SSatish Balay ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 32457b952d6SSatish Balay 32557b952d6SSatish Balay /* determine if any processor has disassembled, if so we must 32657b952d6SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 32757b952d6SSatish Balay MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 32857b952d6SSatish Balay if (mat->was_assembled && !other_disassembled) { 32957b952d6SSatish Balay ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 33057b952d6SSatish Balay } 33157b952d6SSatish Balay 33257b952d6SSatish Balay if (!mat->was_assembled && mode == FINAL_ASSEMBLY) { 33357b952d6SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 33457b952d6SSatish Balay } 33557b952d6SSatish Balay ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 33657b952d6SSatish Balay ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 33757b952d6SSatish Balay 33857b952d6SSatish Balay if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 33957b952d6SSatish Balay return 0; 34057b952d6SSatish Balay } 34157b952d6SSatish Balay 34257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 34357b952d6SSatish Balay { 34457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 34557b952d6SSatish Balay int ierr; 34657b952d6SSatish Balay 34757b952d6SSatish Balay if (baij->size == 1) { 34857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 34957b952d6SSatish Balay } 35057b952d6SSatish Balay else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported"); 35157b952d6SSatish Balay return 0; 35257b952d6SSatish Balay } 35357b952d6SSatish Balay 35457b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 35557b952d6SSatish Balay { 35657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 357cee3aa6bSSatish Balay int ierr, format,rank,bs=baij->bs; 35857b952d6SSatish Balay FILE *fd; 35957b952d6SSatish Balay ViewerType vtype; 36057b952d6SSatish Balay 36157b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 36257b952d6SSatish Balay if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 36357b952d6SSatish Balay ierr = ViewerGetFormat(viewer,&format); 36457b952d6SSatish Balay if (format == ASCII_FORMAT_INFO_DETAILED) { 36557b952d6SSatish Balay int nz, nzalloc, mem; 36657b952d6SSatish Balay MPI_Comm_rank(mat->comm,&rank); 36757b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 36857b952d6SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem); 36957b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 37057b952d6SSatish Balay fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 37157b952d6SSatish Balay rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem); 37257b952d6SSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem); 37357b952d6SSatish Balay fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs); 37457b952d6SSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem); 37557b952d6SSatish Balay fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs); 37657b952d6SSatish Balay fflush(fd); 37757b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 37857b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 37957b952d6SSatish Balay return 0; 38057b952d6SSatish Balay } 38157b952d6SSatish Balay else if (format == ASCII_FORMAT_INFO) { 38257b952d6SSatish Balay return 0; 38357b952d6SSatish Balay } 38457b952d6SSatish Balay } 38557b952d6SSatish Balay 38657b952d6SSatish Balay if (vtype == DRAW_VIEWER) { 38757b952d6SSatish Balay Draw draw; 38857b952d6SSatish Balay PetscTruth isnull; 38957b952d6SSatish Balay ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 39057b952d6SSatish Balay ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 39157b952d6SSatish Balay } 39257b952d6SSatish Balay 39357b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER) { 39457b952d6SSatish Balay ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 39557b952d6SSatish Balay PetscSequentialPhaseBegin(mat->comm,1); 39657b952d6SSatish Balay fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 39757b952d6SSatish Balay baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 39857b952d6SSatish Balay baij->cstart*bs,baij->cend*bs); 39957b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 40057b952d6SSatish Balay ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 40157b952d6SSatish Balay fflush(fd); 40257b952d6SSatish Balay PetscSequentialPhaseEnd(mat->comm,1); 40357b952d6SSatish Balay } 40457b952d6SSatish Balay else { 40557b952d6SSatish Balay int size = baij->size; 40657b952d6SSatish Balay rank = baij->rank; 40757b952d6SSatish Balay if (size == 1) { 40857b952d6SSatish Balay ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 40957b952d6SSatish Balay } 41057b952d6SSatish Balay else { 41157b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 41257b952d6SSatish Balay Mat A; 41357b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 41457b952d6SSatish Balay int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 41557b952d6SSatish Balay int mbs=baij->mbs; 41657b952d6SSatish Balay Scalar *a; 41757b952d6SSatish Balay 41857b952d6SSatish Balay if (!rank) { 419cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42057b952d6SSatish Balay CHKERRQ(ierr); 42157b952d6SSatish Balay } 42257b952d6SSatish Balay else { 423cee3aa6bSSatish Balay ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 42457b952d6SSatish Balay CHKERRQ(ierr); 42557b952d6SSatish Balay } 42657b952d6SSatish Balay PLogObjectParent(mat,A); 42757b952d6SSatish Balay 42857b952d6SSatish Balay /* copy over the A part */ 42957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->A->data; 43057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 43157b952d6SSatish Balay row = baij->rstart; 43257b952d6SSatish Balay rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 43357b952d6SSatish Balay 43457b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 43557b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 43657b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 43757b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 43857b952d6SSatish Balay col = (baij->cstart+aj[j])*bs; 43957b952d6SSatish Balay for (k=0; k<bs; k++ ) { 440cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 441cee3aa6bSSatish Balay col++; a += bs; 44257b952d6SSatish Balay } 44357b952d6SSatish Balay } 44457b952d6SSatish Balay } 44557b952d6SSatish Balay /* copy over the B part */ 44657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*) baij->B->data; 44757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 44857b952d6SSatish Balay row = baij->rstart*bs; 44957b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 45057b952d6SSatish Balay rvals[0] = bs*(baij->rstart + i); 45157b952d6SSatish Balay for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 45257b952d6SSatish Balay for ( j=ai[i]; j<ai[i+1]; j++ ) { 45357b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 45457b952d6SSatish Balay for (k=0; k<bs; k++ ) { 455cee3aa6bSSatish Balay ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 456cee3aa6bSSatish Balay col++; a += bs; 45757b952d6SSatish Balay } 45857b952d6SSatish Balay } 45957b952d6SSatish Balay } 46057b952d6SSatish Balay PetscFree(rvals); 46157b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 46257b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 46357b952d6SSatish Balay if (!rank) { 46457b952d6SSatish Balay ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 46557b952d6SSatish Balay } 46657b952d6SSatish Balay ierr = MatDestroy(A); CHKERRQ(ierr); 46757b952d6SSatish Balay } 46857b952d6SSatish Balay } 46957b952d6SSatish Balay return 0; 47057b952d6SSatish Balay } 47157b952d6SSatish Balay 47257b952d6SSatish Balay 47357b952d6SSatish Balay 47457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 47557b952d6SSatish Balay { 47657b952d6SSatish Balay Mat mat = (Mat) obj; 47757b952d6SSatish Balay int ierr; 47857b952d6SSatish Balay ViewerType vtype; 47957b952d6SSatish Balay 48057b952d6SSatish Balay if (!viewer) { 48157b952d6SSatish Balay viewer = STDOUT_VIEWER_SELF; 48257b952d6SSatish Balay } 48357b952d6SSatish Balay ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 48457b952d6SSatish Balay if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 48557b952d6SSatish Balay vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 48657b952d6SSatish Balay ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 48757b952d6SSatish Balay } 48857b952d6SSatish Balay else if (vtype == BINARY_FILE_VIEWER) { 48957b952d6SSatish Balay return MatView_MPIBAIJ_Binary(mat,viewer); 49057b952d6SSatish Balay } 49157b952d6SSatish Balay return 0; 49257b952d6SSatish Balay } 49357b952d6SSatish Balay 49479bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 49579bdfe76SSatish Balay { 49679bdfe76SSatish Balay Mat mat = (Mat) obj; 49779bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 49879bdfe76SSatish Balay int ierr; 49979bdfe76SSatish Balay 50079bdfe76SSatish Balay #if defined(PETSC_LOG) 50179bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 50279bdfe76SSatish Balay #endif 50379bdfe76SSatish Balay 50479bdfe76SSatish Balay PetscFree(baij->rowners); 50579bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 50679bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 50779bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 50879bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 50979bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 51079bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 51179bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 51279bdfe76SSatish Balay PetscFree(baij); 51379bdfe76SSatish Balay PLogObjectDestroy(mat); 51479bdfe76SSatish Balay PetscHeaderDestroy(mat); 51579bdfe76SSatish Balay return 0; 51679bdfe76SSatish Balay } 51779bdfe76SSatish Balay 518cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 519cee3aa6bSSatish Balay { 520cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 521cee3aa6bSSatish Balay int ierr; 522cee3aa6bSSatish Balay 523cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 524cee3aa6bSSatish Balay ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 525cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr); 526cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 527cee3aa6bSSatish Balay return 0; 528cee3aa6bSSatish Balay } 529cee3aa6bSSatish Balay 530cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 531cee3aa6bSSatish Balay { 532cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 533cee3aa6bSSatish Balay int ierr; 534cee3aa6bSSatish Balay ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 535cee3aa6bSSatish Balay ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 536cee3aa6bSSatish Balay ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr); 537cee3aa6bSSatish Balay ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 538cee3aa6bSSatish Balay return 0; 539cee3aa6bSSatish Balay } 540cee3aa6bSSatish Balay 541cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 542cee3aa6bSSatish Balay { 543cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 544cee3aa6bSSatish Balay int ierr; 545cee3aa6bSSatish Balay 546cee3aa6bSSatish Balay /* do nondiagonal part */ 547cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 548cee3aa6bSSatish Balay /* send it on its way */ 549cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES, 550cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 551cee3aa6bSSatish Balay /* do local part */ 552cee3aa6bSSatish Balay ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 553cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 554cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 555cee3aa6bSSatish Balay /* but is not perhaps always true. */ 556cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES, 557cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 558cee3aa6bSSatish Balay return 0; 559cee3aa6bSSatish Balay } 560cee3aa6bSSatish Balay 561cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 562cee3aa6bSSatish Balay { 563cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 564cee3aa6bSSatish Balay int ierr; 565cee3aa6bSSatish Balay 566cee3aa6bSSatish Balay /* do nondiagonal part */ 567cee3aa6bSSatish Balay ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 568cee3aa6bSSatish Balay /* send it on its way */ 569cee3aa6bSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES, 570cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 571cee3aa6bSSatish Balay /* do local part */ 572cee3aa6bSSatish Balay ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 573cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 574cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 575cee3aa6bSSatish Balay /* but is not perhaps always true. */ 576cee3aa6bSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES, 577cee3aa6bSSatish Balay (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr); 578cee3aa6bSSatish Balay return 0; 579cee3aa6bSSatish Balay } 580cee3aa6bSSatish Balay 581cee3aa6bSSatish Balay /* 582cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 583cee3aa6bSSatish Balay diagonal block 584cee3aa6bSSatish Balay */ 585cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 586cee3aa6bSSatish Balay { 587cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 588cee3aa6bSSatish Balay if (a->M != a->N) 589cee3aa6bSSatish Balay SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block"); 590cee3aa6bSSatish Balay return MatGetDiagonal(a->A,v); 591cee3aa6bSSatish Balay } 592cee3aa6bSSatish Balay 593cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A) 594cee3aa6bSSatish Balay { 595cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 596cee3aa6bSSatish Balay int ierr; 597cee3aa6bSSatish Balay ierr = MatScale(aa,a->A); CHKERRQ(ierr); 598cee3aa6bSSatish Balay ierr = MatScale(aa,a->B); CHKERRQ(ierr); 599cee3aa6bSSatish Balay return 0; 600cee3aa6bSSatish Balay } 60157b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 60257b952d6SSatish Balay { 60357b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 60457b952d6SSatish Balay *m = mat->M; *n = mat->N; 60557b952d6SSatish Balay return 0; 60657b952d6SSatish Balay } 60757b952d6SSatish Balay 60857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 60957b952d6SSatish Balay { 61057b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 61157b952d6SSatish Balay *m = mat->m; *n = mat->N; 61257b952d6SSatish Balay return 0; 61357b952d6SSatish Balay } 61457b952d6SSatish Balay 61557b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 61657b952d6SSatish Balay { 61757b952d6SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 61857b952d6SSatish Balay *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 61957b952d6SSatish Balay return 0; 62057b952d6SSatish Balay } 62157b952d6SSatish Balay 622*acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 623*acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 624*acdf5bf4SSatish Balay 625*acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 626*acdf5bf4SSatish Balay { 627*acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 628*acdf5bf4SSatish Balay Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 629*acdf5bf4SSatish Balay int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 630*acdf5bf4SSatish Balay int nztot, nzA, nzB, lrow, rstart = mat->rstart*bs, rend = mat->rend*bs; 631*acdf5bf4SSatish Balay int *cmap, *idx_p,cstart = mat->cstart*bs; 632*acdf5bf4SSatish Balay 633*acdf5bf4SSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active"); 634*acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 635*acdf5bf4SSatish Balay 636*acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 637*acdf5bf4SSatish Balay /* 638*acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 639*acdf5bf4SSatish Balay */ 640*acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 641*acdf5bf4SSatish Balay int max = 1,nbs = mat->nbs,tmp; 642*acdf5bf4SSatish Balay for ( i=0; i<nbs; i++ ) { 643*acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 644*acdf5bf4SSatish Balay if (max < tmp) { max = tmp; } 645*acdf5bf4SSatish Balay } 646*acdf5bf4SSatish Balay mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 647*acdf5bf4SSatish Balay CHKPTRQ(mat->rowvalues); 648*acdf5bf4SSatish Balay mat->rowindices = (int *) (mat->rowvalues + max*bs2); 649*acdf5bf4SSatish Balay } 650*acdf5bf4SSatish Balay 651*acdf5bf4SSatish Balay 652*acdf5bf4SSatish Balay if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows") 653*acdf5bf4SSatish Balay lrow = row - rstart; 654*acdf5bf4SSatish Balay 655*acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 656*acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 657*acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 658*acdf5bf4SSatish Balay ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 659*acdf5bf4SSatish Balay ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 660*acdf5bf4SSatish Balay nztot = nzA + nzB; 661*acdf5bf4SSatish Balay 662*acdf5bf4SSatish Balay cmap = mat->garray; 663*acdf5bf4SSatish Balay if (v || idx) { 664*acdf5bf4SSatish Balay if (nztot) { 665*acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 666*acdf5bf4SSatish Balay int imark = -1; 667*acdf5bf4SSatish Balay if (v) { 668*acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 669*acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 670*acdf5bf4SSatish Balay if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 671*acdf5bf4SSatish Balay else break; 672*acdf5bf4SSatish Balay } 673*acdf5bf4SSatish Balay imark = i; 674*acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 675*acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 676*acdf5bf4SSatish Balay } 677*acdf5bf4SSatish Balay if (idx) { 678*acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 679*acdf5bf4SSatish Balay if (imark > -1) { 680*acdf5bf4SSatish Balay for ( i=0; i<imark; i++ ) { 681*acdf5bf4SSatish Balay idx_p[i] = cmap[cworkB[i]]; 682*acdf5bf4SSatish Balay } 683*acdf5bf4SSatish Balay } else { 684*acdf5bf4SSatish Balay for ( i=0; i<nzB; i++ ) { 685*acdf5bf4SSatish Balay if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 686*acdf5bf4SSatish Balay else break; 687*acdf5bf4SSatish Balay } 688*acdf5bf4SSatish Balay imark = i; 689*acdf5bf4SSatish Balay } 690*acdf5bf4SSatish Balay for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 691*acdf5bf4SSatish Balay for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 692*acdf5bf4SSatish Balay } 693*acdf5bf4SSatish Balay } 694*acdf5bf4SSatish Balay else {*idx = 0; *v=0;} 695*acdf5bf4SSatish Balay } 696*acdf5bf4SSatish Balay *nz = nztot; 697*acdf5bf4SSatish Balay ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 698*acdf5bf4SSatish Balay ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 699*acdf5bf4SSatish Balay return 0; 700*acdf5bf4SSatish Balay } 701*acdf5bf4SSatish Balay 702*acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 703*acdf5bf4SSatish Balay { 704*acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 705*acdf5bf4SSatish Balay if (baij->getrowactive == PETSC_FALSE) { 706*acdf5bf4SSatish Balay SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called"); 707*acdf5bf4SSatish Balay } 708*acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 709*acdf5bf4SSatish Balay return 0; 710*acdf5bf4SSatish Balay } 711*acdf5bf4SSatish Balay 71279bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 71379bdfe76SSatish Balay static struct _MatOps MatOps = { 714cee3aa6bSSatish Balay MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ, 715cee3aa6bSSatish Balay MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 71679bdfe76SSatish Balay 0,0,0,0, 71779bdfe76SSatish Balay 0,0,0,0, 718*acdf5bf4SSatish Balay 0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ, 71957b952d6SSatish Balay MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0, 72057b952d6SSatish Balay 0,0,MatGetReordering_MPIBAIJ,0, 72157b952d6SSatish Balay 0,0,0,MatGetSize_MPIBAIJ, 72257b952d6SSatish Balay MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 72379bdfe76SSatish Balay 0,0,0,0, 72479bdfe76SSatish Balay 0,0,0,0, 72579bdfe76SSatish Balay 0,0,0,0, 726*acdf5bf4SSatish Balay 0,MatGetValues_MPIBAIJ,0,0, 727cee3aa6bSSatish Balay MatScale_MPIBAIJ,0,0}; 72879bdfe76SSatish Balay 72979bdfe76SSatish Balay 73079bdfe76SSatish Balay /*@C 73179bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 73279bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 73379bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 73479bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 73579bdfe76SSatish Balay performance can be increased by more than a factor of 50. 73679bdfe76SSatish Balay 73779bdfe76SSatish Balay Input Parameters: 73879bdfe76SSatish Balay . comm - MPI communicator 73979bdfe76SSatish Balay . bs - size of blockk 74079bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 74179bdfe76SSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated 74279bdfe76SSatish Balay if N is given) 74379bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 74479bdfe76SSatish Balay . N - number of global columns (or PETSC_DECIDE to have calculated 74579bdfe76SSatish Balay if n is given) 74679bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 74779bdfe76SSatish Balay submatrix (same for all local rows) 74879bdfe76SSatish Balay . d_nzz - number of block nonzeros per block row in diagonal portion of local 74979bdfe76SSatish Balay submatrix or null (possibly different for each row). You must leave 75079bdfe76SSatish Balay room for the diagonal entry even if it is zero. 75179bdfe76SSatish Balay . o_nz - number of block nonzeros per block row in off-diagonal portion of local 75279bdfe76SSatish Balay submatrix (same for all local rows). 75379bdfe76SSatish Balay . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 75479bdfe76SSatish Balay submatrix or null (possibly different for each row). 75579bdfe76SSatish Balay 75679bdfe76SSatish Balay Output Parameter: 75779bdfe76SSatish Balay . A - the matrix 75879bdfe76SSatish Balay 75979bdfe76SSatish Balay Notes: 76079bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 76179bdfe76SSatish Balay (possibly both). 76279bdfe76SSatish Balay 76379bdfe76SSatish Balay Storage Information: 76479bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 76579bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 76679bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 76779bdfe76SSatish Balay local matrix (a rectangular submatrix). 76879bdfe76SSatish Balay 76979bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 77079bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 77179bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 77279bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 77379bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 77479bdfe76SSatish Balay 77579bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 77679bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 77779bdfe76SSatish Balay 77879bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 77979bdfe76SSatish Balay $ ------------------- 78079bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 78179bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 78279bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 78379bdfe76SSatish Balay $ ------------------- 78479bdfe76SSatish Balay $ 78579bdfe76SSatish Balay 78679bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 78779bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 78879bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 78957b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 79079bdfe76SSatish Balay 79179bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 79279bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 79379bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 79479bdfe76SSatish Balay one expects d_nz >> o_nz. For additional details, see the users manual 79579bdfe76SSatish Balay chapter on matrices and the file $(PETSC_DIR)/Performance. 79679bdfe76SSatish Balay 79779bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel 79879bdfe76SSatish Balay 79979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 80079bdfe76SSatish Balay @*/ 80179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 80279bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 80379bdfe76SSatish Balay { 80479bdfe76SSatish Balay Mat B; 80579bdfe76SSatish Balay Mat_MPIBAIJ *b; 806cee3aa6bSSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE; 80779bdfe76SSatish Balay 80879bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 80979bdfe76SSatish Balay *A = 0; 81079bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 81179bdfe76SSatish Balay PLogObjectCreate(B); 81279bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 81379bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 81479bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 81579bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 81679bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 81757b952d6SSatish Balay 81879bdfe76SSatish Balay B->factor = 0; 81979bdfe76SSatish Balay B->assembled = PETSC_FALSE; 82079bdfe76SSatish Balay 82179bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 82279bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 82379bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 82479bdfe76SSatish Balay 82579bdfe76SSatish Balay if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 82657b952d6SSatish Balay SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 827cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified"); 828cee3aa6bSSatish Balay if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified"); 829cee3aa6bSSatish Balay if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 830cee3aa6bSSatish Balay if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 83179bdfe76SSatish Balay 83279bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 83379bdfe76SSatish Balay work[0] = m; work[1] = n; 83479bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 83579bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 83679bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 83779bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 83879bdfe76SSatish Balay } 83979bdfe76SSatish Balay if (m == PETSC_DECIDE) { 84079bdfe76SSatish Balay Mbs = M/bs; 84179bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 84279bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 84379bdfe76SSatish Balay m = mbs*bs; 84479bdfe76SSatish Balay } 84579bdfe76SSatish Balay if (n == PETSC_DECIDE) { 84679bdfe76SSatish Balay Nbs = N/bs; 84779bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 84879bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 84979bdfe76SSatish Balay n = nbs*bs; 85079bdfe76SSatish Balay } 851cee3aa6bSSatish Balay if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 85279bdfe76SSatish Balay 85379bdfe76SSatish Balay b->m = m; B->m = m; 85479bdfe76SSatish Balay b->n = n; B->n = n; 85579bdfe76SSatish Balay b->N = N; B->N = N; 85679bdfe76SSatish Balay b->M = M; B->M = M; 85779bdfe76SSatish Balay b->bs = bs; 85879bdfe76SSatish Balay b->bs2 = bs*bs; 85979bdfe76SSatish Balay b->mbs = mbs; 86079bdfe76SSatish Balay b->nbs = nbs; 86179bdfe76SSatish Balay b->Mbs = Mbs; 86279bdfe76SSatish Balay b->Nbs = Nbs; 86379bdfe76SSatish Balay 86479bdfe76SSatish Balay /* build local table of row and column ownerships */ 86579bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 86679bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 86779bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 86879bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 86979bdfe76SSatish Balay b->rowners[0] = 0; 87079bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 87179bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 87279bdfe76SSatish Balay } 87379bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 87479bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 87579bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 87679bdfe76SSatish Balay b->cowners[0] = 0; 87779bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 87879bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 87979bdfe76SSatish Balay } 88079bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 88179bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 88279bdfe76SSatish Balay 88379bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 88479bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 88579bdfe76SSatish Balay PLogObjectParent(B,b->A); 88679bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 88779bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 88879bdfe76SSatish Balay PLogObjectParent(B,b->B); 88979bdfe76SSatish Balay 89079bdfe76SSatish Balay /* build cache for off array entries formed */ 89179bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 89279bdfe76SSatish Balay b->colmap = 0; 89379bdfe76SSatish Balay b->garray = 0; 89479bdfe76SSatish Balay b->roworiented = 1; 89579bdfe76SSatish Balay 89679bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 89779bdfe76SSatish Balay b->lvec = 0; 89879bdfe76SSatish Balay b->Mvctx = 0; 89979bdfe76SSatish Balay 90079bdfe76SSatish Balay /* stuff for MatGetRow() */ 90179bdfe76SSatish Balay b->rowindices = 0; 90279bdfe76SSatish Balay b->rowvalues = 0; 90379bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 90479bdfe76SSatish Balay 90579bdfe76SSatish Balay *A = B; 90679bdfe76SSatish Balay return 0; 90779bdfe76SSatish Balay } 90857b952d6SSatish Balay 90957b952d6SSatish Balay #include "sys.h" 91057b952d6SSatish Balay 91157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 91257b952d6SSatish Balay { 91357b952d6SSatish Balay Mat A; 91457b952d6SSatish Balay int i, nz, ierr, j,rstart, rend, fd; 91557b952d6SSatish Balay Scalar *vals,*buf; 91657b952d6SSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 91757b952d6SSatish Balay MPI_Status status; 918cee3aa6bSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 91957b952d6SSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 92057b952d6SSatish Balay int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 92157b952d6SSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 92257b952d6SSatish Balay int dcount,kmax,k,nzcount,tmp; 92357b952d6SSatish Balay 92457b952d6SSatish Balay 92557b952d6SSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 92657b952d6SSatish Balay bs2 = bs*bs; 92757b952d6SSatish Balay 92857b952d6SSatish Balay MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 92957b952d6SSatish Balay if (!rank) { 93057b952d6SSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 93157b952d6SSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr); 932cee3aa6bSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object"); 93357b952d6SSatish Balay } 93457b952d6SSatish Balay 93557b952d6SSatish Balay MPI_Bcast(header+1,3,MPI_INT,0,comm); 93657b952d6SSatish Balay M = header[1]; N = header[2]; 93757b952d6SSatish Balay 93857b952d6SSatish Balay 93957b952d6SSatish Balay if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 94057b952d6SSatish Balay 94157b952d6SSatish Balay /* 94257b952d6SSatish Balay This code adds extra rows to make sure the number of rows is 94357b952d6SSatish Balay divisible by the blocksize 94457b952d6SSatish Balay */ 94557b952d6SSatish Balay Mbs = M/bs; 94657b952d6SSatish Balay extra_rows = bs - M + bs*(Mbs); 94757b952d6SSatish Balay if (extra_rows == bs) extra_rows = 0; 94857b952d6SSatish Balay else Mbs++; 94957b952d6SSatish Balay if (extra_rows &&!rank) { 95057b952d6SSatish Balay PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 95157b952d6SSatish Balay } 95257b952d6SSatish Balay /* determine ownership of all rows */ 95357b952d6SSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 95457b952d6SSatish Balay m = mbs * bs; 955cee3aa6bSSatish Balay rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 956cee3aa6bSSatish Balay browners = rowners + size + 1; 95757b952d6SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm); 95857b952d6SSatish Balay rowners[0] = 0; 959cee3aa6bSSatish Balay for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 960cee3aa6bSSatish Balay for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 96157b952d6SSatish Balay rstart = rowners[rank]; 96257b952d6SSatish Balay rend = rowners[rank+1]; 96357b952d6SSatish Balay 96457b952d6SSatish Balay /* distribute row lengths to all processors */ 96557b952d6SSatish Balay locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 96657b952d6SSatish Balay if (!rank) { 96757b952d6SSatish Balay rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 96857b952d6SSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 96957b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 97057b952d6SSatish Balay sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 971cee3aa6bSSatish Balay for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 972cee3aa6bSSatish Balay MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm); 97357b952d6SSatish Balay PetscFree(sndcounts); 97457b952d6SSatish Balay } 97557b952d6SSatish Balay else { 97657b952d6SSatish Balay MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm); 97757b952d6SSatish Balay } 97857b952d6SSatish Balay 97957b952d6SSatish Balay if (!rank) { 98057b952d6SSatish Balay /* calculate the number of nonzeros on each processor */ 98157b952d6SSatish Balay procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 98257b952d6SSatish Balay PetscMemzero(procsnz,size*sizeof(int)); 98357b952d6SSatish Balay for ( i=0; i<size; i++ ) { 98457b952d6SSatish Balay for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 98557b952d6SSatish Balay procsnz[i] += rowlengths[j]; 98657b952d6SSatish Balay } 98757b952d6SSatish Balay } 98857b952d6SSatish Balay PetscFree(rowlengths); 98957b952d6SSatish Balay 99057b952d6SSatish Balay /* determine max buffer needed and allocate it */ 99157b952d6SSatish Balay maxnz = 0; 99257b952d6SSatish Balay for ( i=0; i<size; i++ ) { 99357b952d6SSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 99457b952d6SSatish Balay } 99557b952d6SSatish Balay cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 99657b952d6SSatish Balay 99757b952d6SSatish Balay /* read in my part of the matrix column indices */ 99857b952d6SSatish Balay nz = procsnz[0]; 99957b952d6SSatish Balay ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 100057b952d6SSatish Balay mycols = ibuf; 1001cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 100257b952d6SSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr); 1003cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 1004cee3aa6bSSatish Balay 100557b952d6SSatish Balay /* read in every ones (except the last) and ship off */ 100657b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 100757b952d6SSatish Balay nz = procsnz[i]; 100857b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 100957b952d6SSatish Balay MPI_Send(cols,nz,MPI_INT,i,tag,comm); 101057b952d6SSatish Balay } 101157b952d6SSatish Balay /* read in the stuff for the last proc */ 101257b952d6SSatish Balay if ( size != 1 ) { 101357b952d6SSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 101457b952d6SSatish Balay ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr); 101557b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 1016cee3aa6bSSatish Balay MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm); 101757b952d6SSatish Balay } 101857b952d6SSatish Balay PetscFree(cols); 101957b952d6SSatish Balay } 102057b952d6SSatish Balay else { 102157b952d6SSatish Balay /* determine buffer space needed for message */ 102257b952d6SSatish Balay nz = 0; 102357b952d6SSatish Balay for ( i=0; i<m; i++ ) { 102457b952d6SSatish Balay nz += locrowlens[i]; 102557b952d6SSatish Balay } 102657b952d6SSatish Balay ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 102757b952d6SSatish Balay mycols = ibuf; 102857b952d6SSatish Balay /* receive message of column indices*/ 102957b952d6SSatish Balay MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 103057b952d6SSatish Balay MPI_Get_count(&status,MPI_INT,&maxnz); 1031cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 103257b952d6SSatish Balay } 103357b952d6SSatish Balay 103457b952d6SSatish Balay /* loop over local rows, determining number of off diagonal entries */ 1035cee3aa6bSSatish Balay dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 1036cee3aa6bSSatish Balay odlens = dlens + (rend-rstart); 103757b952d6SSatish Balay mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 1038cee3aa6bSSatish Balay PetscMemzero(mask,3*Mbs*sizeof(int)); 103957b952d6SSatish Balay masked1 = mask + Mbs; 104057b952d6SSatish Balay masked2 = masked1 + Mbs; 104157b952d6SSatish Balay rowcount = 0; nzcount = 0; 104257b952d6SSatish Balay for ( i=0; i<mbs; i++ ) { 104357b952d6SSatish Balay dcount = 0; 104457b952d6SSatish Balay odcount = 0; 104557b952d6SSatish Balay for ( j=0; j<bs; j++ ) { 104657b952d6SSatish Balay kmax = locrowlens[rowcount]; 104757b952d6SSatish Balay for ( k=0; k<kmax; k++ ) { 104857b952d6SSatish Balay tmp = mycols[nzcount++]/bs; 104957b952d6SSatish Balay if (!mask[tmp]) { 105057b952d6SSatish Balay mask[tmp] = 1; 105157b952d6SSatish Balay if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 105257b952d6SSatish Balay else masked1[dcount++] = tmp; 105357b952d6SSatish Balay } 105457b952d6SSatish Balay } 105557b952d6SSatish Balay rowcount++; 105657b952d6SSatish Balay } 1057cee3aa6bSSatish Balay 105857b952d6SSatish Balay dlens[i] = dcount; 105957b952d6SSatish Balay odlens[i] = odcount; 1060cee3aa6bSSatish Balay 106157b952d6SSatish Balay /* zero out the mask elements we set */ 106257b952d6SSatish Balay for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 106357b952d6SSatish Balay for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 106457b952d6SSatish Balay } 1065cee3aa6bSSatish Balay 106657b952d6SSatish Balay /* create our matrix */ 106757b952d6SSatish Balay ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 106857b952d6SSatish Balay A = *newmat; 106957b952d6SSatish Balay MatSetOption(A,COLUMNS_SORTED); 107057b952d6SSatish Balay 107157b952d6SSatish Balay if (!rank) { 107257b952d6SSatish Balay buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 107357b952d6SSatish Balay /* read in my part of the matrix numerical values */ 107457b952d6SSatish Balay nz = procsnz[0]; 107557b952d6SSatish Balay vals = buf; 1076cee3aa6bSSatish Balay mycols = ibuf; 1077cee3aa6bSSatish Balay if (size == 1) nz -= extra_rows; 107857b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 1079cee3aa6bSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 108057b952d6SSatish Balay /* insert into matrix */ 108157b952d6SSatish Balay jj = rstart*bs; 108257b952d6SSatish Balay for ( i=0; i<m; i++ ) { 108357b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 108457b952d6SSatish Balay mycols += locrowlens[i]; 108557b952d6SSatish Balay vals += locrowlens[i]; 108657b952d6SSatish Balay jj++; 108757b952d6SSatish Balay } 108857b952d6SSatish Balay /* read in other processors( except the last one) and ship out */ 108957b952d6SSatish Balay for ( i=1; i<size-1; i++ ) { 109057b952d6SSatish Balay nz = procsnz[i]; 109157b952d6SSatish Balay vals = buf; 109257b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 109357b952d6SSatish Balay MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 109457b952d6SSatish Balay } 109557b952d6SSatish Balay /* the last proc */ 109657b952d6SSatish Balay if ( size != 1 ){ 109757b952d6SSatish Balay nz = procsnz[i] - extra_rows; 1098cee3aa6bSSatish Balay vals = buf; 109957b952d6SSatish Balay ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr); 110057b952d6SSatish Balay for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 1101cee3aa6bSSatish Balay MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm); 110257b952d6SSatish Balay } 110357b952d6SSatish Balay PetscFree(procsnz); 110457b952d6SSatish Balay } 110557b952d6SSatish Balay else { 110657b952d6SSatish Balay /* receive numeric values */ 110757b952d6SSatish Balay buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 110857b952d6SSatish Balay 110957b952d6SSatish Balay /* receive message of values*/ 111057b952d6SSatish Balay vals = buf; 1111cee3aa6bSSatish Balay mycols = ibuf; 111257b952d6SSatish Balay MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 111357b952d6SSatish Balay MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1114cee3aa6bSSatish Balay if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file"); 111557b952d6SSatish Balay 111657b952d6SSatish Balay /* insert into matrix */ 111757b952d6SSatish Balay jj = rstart*bs; 1118cee3aa6bSSatish Balay for ( i=0; i<m; i++ ) { 111957b952d6SSatish Balay ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 112057b952d6SSatish Balay mycols += locrowlens[i]; 112157b952d6SSatish Balay vals += locrowlens[i]; 112257b952d6SSatish Balay jj++; 112357b952d6SSatish Balay } 112457b952d6SSatish Balay } 112557b952d6SSatish Balay PetscFree(locrowlens); 112657b952d6SSatish Balay PetscFree(buf); 112757b952d6SSatish Balay PetscFree(ibuf); 112857b952d6SSatish Balay PetscFree(rowners); 112957b952d6SSatish Balay PetscFree(dlens); 1130cee3aa6bSSatish Balay PetscFree(mask); 113157b952d6SSatish Balay ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 113257b952d6SSatish Balay ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr); 113357b952d6SSatish Balay return 0; 113457b952d6SSatish Balay } 113557b952d6SSatish Balay 113657b952d6SSatish Balay 1137