xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bd16c2fe4d4045da961594cb53fb6c4a23bb4ff7)
179bdfe76SSatish Balay #ifndef lint
2*bd16c2feSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.7 1996/07/01 23:20:26 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
1357b952d6SSatish Balay 
1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
1657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
1757b952d6SSatish Balay length of colmap equals the global matrix length.
1857b952d6SSatish Balay */
1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2057b952d6SSatish Balay {
2157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2357b952d6SSatish Balay   int        nbs = B->nbs,i;
2457b952d6SSatish Balay 
2557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
2657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
2757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
2857b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
2957b952d6SSatish Balay   return 0;
3057b952d6SSatish Balay }
3157b952d6SSatish Balay 
3257b952d6SSatish Balay 
33acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   int         ierr;
3757b952d6SSatish Balay   if (baij->size == 1) {
3857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
3957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
4057b952d6SSatish Balay   return 0;
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   Scalar      value;
4757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
4857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
4957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
5057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
5157b952d6SSatish Balay 
5257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
5357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
5457b952d6SSatish Balay   }
5557b952d6SSatish Balay   baij->insertmode = addv;
5657b952d6SSatish Balay   rstart_orig = rstart*bs;
5757b952d6SSatish Balay   rend_orig   = rend*bs;
5857b952d6SSatish Balay   cstart_orig = cstart*bs;
5957b952d6SSatish Balay   cend_orig   = cend*bs;
6057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
6157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
6257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
6357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
6457b952d6SSatish Balay       row = im[i] - rstart_orig;
6557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
6657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
6757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
6857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
6957b952d6SSatish Balay           col = in[j] - cstart_orig;
7057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
7157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
7257b952d6SSatish Balay         }
7357b952d6SSatish Balay         else {
7457b952d6SSatish Balay           if (mat->was_assembled) {
7557b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
7657b952d6SSatish Balay             col = baij->colmap[in[j]];
7757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
7857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
7957b952d6SSatish Balay               col =  in[j];
8057b952d6SSatish Balay             }
8157b952d6SSatish Balay           }
8257b952d6SSatish Balay           else col = in[j];
8357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8457b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8557b952d6SSatish Balay         }
8657b952d6SSatish Balay       }
8757b952d6SSatish Balay     }
8857b952d6SSatish Balay     else {
8957b952d6SSatish Balay       if (roworiented) {
9057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
9157b952d6SSatish Balay       }
9257b952d6SSatish Balay       else {
9357b952d6SSatish Balay         row = im[i];
9457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
9557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
9657b952d6SSatish Balay         }
9757b952d6SSatish Balay       }
9857b952d6SSatish Balay     }
9957b952d6SSatish Balay   }
10057b952d6SSatish Balay   return 0;
10157b952d6SSatish Balay }
10257b952d6SSatish Balay 
103d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
104d6de1c52SSatish Balay {
105d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
106d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
107d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
108d6de1c52SSatish Balay 
109d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
110d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
111d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
112d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
113d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
114d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
115d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
116d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
117d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
118d6de1c52SSatish Balay           col = idxn[j] - bscstart;
119d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
120d6de1c52SSatish Balay         }
121d6de1c52SSatish Balay         else {
122acdf5bf4SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
123d9d09a02SSatish Balay           if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0;
124d9d09a02SSatish Balay           else {
125d9d09a02SSatish Balay             col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs;
126d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
127d6de1c52SSatish Balay           }
128d6de1c52SSatish Balay         }
129d6de1c52SSatish Balay       }
130d9d09a02SSatish Balay     }
131d6de1c52SSatish Balay     else {
132d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
133d6de1c52SSatish Balay     }
134d6de1c52SSatish Balay   }
135d6de1c52SSatish Balay   return 0;
136d6de1c52SSatish Balay }
137d6de1c52SSatish Balay 
138d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
139d6de1c52SSatish Balay {
140d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
141d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
142acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
143d6de1c52SSatish Balay   double     sum = 0.0;
144d6de1c52SSatish Balay   Scalar     *v;
145d6de1c52SSatish Balay 
146d6de1c52SSatish Balay   if (baij->size == 1) {
147d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
148d6de1c52SSatish Balay   } else {
149d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
150d6de1c52SSatish Balay       v = amat->a;
151d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
152d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
153d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
154d6de1c52SSatish Balay #else
155d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
156d6de1c52SSatish Balay #endif
157d6de1c52SSatish Balay       }
158d6de1c52SSatish Balay       v = bmat->a;
159d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
160d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
161d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
162d6de1c52SSatish Balay #else
163d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
164d6de1c52SSatish Balay #endif
165d6de1c52SSatish Balay       }
166d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
167d6de1c52SSatish Balay       *norm = sqrt(*norm);
168d6de1c52SSatish Balay     }
169acdf5bf4SSatish Balay     else
170acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
171d6de1c52SSatish Balay   }
172d6de1c52SSatish Balay   return 0;
173d6de1c52SSatish Balay }
17457b952d6SSatish Balay 
17557b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
17657b952d6SSatish Balay {
17757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
17857b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
17957b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
18057b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
18157b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
18257b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
18357b952d6SSatish Balay   InsertMode  addv;
18457b952d6SSatish Balay   Scalar      *rvalues,*svalues;
18557b952d6SSatish Balay 
18657b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
18757b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
18857b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
18957b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
19057b952d6SSatish Balay   }
19157b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
19257b952d6SSatish Balay 
19357b952d6SSatish Balay   /*  first count number of contributors to each processor */
19457b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
19557b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
19657b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
19757b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
19857b952d6SSatish Balay     idx = baij->stash.idx[i];
19957b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
20057b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
20157b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
20257b952d6SSatish Balay       }
20357b952d6SSatish Balay     }
20457b952d6SSatish Balay   }
20557b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
20657b952d6SSatish Balay 
20757b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
20857b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
20957b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
21057b952d6SSatish Balay   nreceives = work[rank];
21157b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
21257b952d6SSatish Balay   nmax = work[rank];
21357b952d6SSatish Balay   PetscFree(work);
21457b952d6SSatish Balay 
21557b952d6SSatish Balay   /* post receives:
21657b952d6SSatish Balay        1) each message will consist of ordered pairs
21757b952d6SSatish Balay      (global index,value) we store the global index as a double
21857b952d6SSatish Balay      to simplify the message passing.
21957b952d6SSatish Balay        2) since we don't know how long each individual message is we
22057b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
22157b952d6SSatish Balay      this is a lot of wasted space.
22257b952d6SSatish Balay 
22357b952d6SSatish Balay 
22457b952d6SSatish Balay        This could be done better.
22557b952d6SSatish Balay   */
22657b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
22757b952d6SSatish Balay   CHKPTRQ(rvalues);
22857b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
22957b952d6SSatish Balay   CHKPTRQ(recv_waits);
23057b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
23157b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
23257b952d6SSatish Balay               comm,recv_waits+i);
23357b952d6SSatish Balay   }
23457b952d6SSatish Balay 
23557b952d6SSatish Balay   /* do sends:
23657b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
23757b952d6SSatish Balay          the ith processor
23857b952d6SSatish Balay   */
23957b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
24057b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
24157b952d6SSatish Balay   CHKPTRQ(send_waits);
24257b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
24357b952d6SSatish Balay   starts[0] = 0;
24457b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
24557b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
24657b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
24757b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
24857b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
24957b952d6SSatish Balay   }
25057b952d6SSatish Balay   PetscFree(owner);
25157b952d6SSatish Balay   starts[0] = 0;
25257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
25357b952d6SSatish Balay   count = 0;
25457b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
25557b952d6SSatish Balay     if (procs[i]) {
25657b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
25757b952d6SSatish Balay                 comm,send_waits+count++);
25857b952d6SSatish Balay     }
25957b952d6SSatish Balay   }
26057b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
26157b952d6SSatish Balay 
26257b952d6SSatish Balay   /* Free cache space */
26357b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
26457b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
26557b952d6SSatish Balay 
26657b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
26757b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
26857b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
26957b952d6SSatish Balay   baij->rmax       = nmax;
27057b952d6SSatish Balay 
27157b952d6SSatish Balay   return 0;
27257b952d6SSatish Balay }
27357b952d6SSatish Balay 
27457b952d6SSatish Balay 
27557b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
27657b952d6SSatish Balay {
27757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27857b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
27957b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
28057b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
28157b952d6SSatish Balay   Scalar      *values,val;
28257b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
28357b952d6SSatish Balay 
28457b952d6SSatish Balay   /*  wait on receives */
28557b952d6SSatish Balay   while (count) {
28657b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
28757b952d6SSatish Balay     /* unpack receives into our local space */
28857b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
28957b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
29057b952d6SSatish Balay     n = n/3;
29157b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
29257b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
29357b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
29457b952d6SSatish Balay       val = values[3*i+2];
29557b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
29657b952d6SSatish Balay         col -= baij->cstart*bs;
29757b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
29857b952d6SSatish Balay       }
29957b952d6SSatish Balay       else {
30057b952d6SSatish Balay         if (mat->was_assembled) {
30157b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
30257b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
30357b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
30457b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
30557b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
30657b952d6SSatish Balay           }
30757b952d6SSatish Balay         }
30857b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
30957b952d6SSatish Balay       }
31057b952d6SSatish Balay     }
31157b952d6SSatish Balay     count--;
31257b952d6SSatish Balay   }
31357b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
31457b952d6SSatish Balay 
31557b952d6SSatish Balay   /* wait on sends */
31657b952d6SSatish Balay   if (baij->nsends) {
31757b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
31857b952d6SSatish Balay     CHKPTRQ(send_status);
31957b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
32057b952d6SSatish Balay     PetscFree(send_status);
32157b952d6SSatish Balay   }
32257b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
32357b952d6SSatish Balay 
32457b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
32557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
32657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
32757b952d6SSatish Balay 
32857b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
32957b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
33057b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
33157b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
33257b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
33357b952d6SSatish Balay   }
33457b952d6SSatish Balay 
33557b952d6SSatish Balay   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
33657b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
33757b952d6SSatish Balay   }
33857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
33957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
34057b952d6SSatish Balay 
34157b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
34257b952d6SSatish Balay   return 0;
34357b952d6SSatish Balay }
34457b952d6SSatish Balay 
34557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
34657b952d6SSatish Balay {
34757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
34857b952d6SSatish Balay   int          ierr;
34957b952d6SSatish Balay 
35057b952d6SSatish Balay   if (baij->size == 1) {
35157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
35257b952d6SSatish Balay   }
35357b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
35457b952d6SSatish Balay   return 0;
35557b952d6SSatish Balay }
35657b952d6SSatish Balay 
35757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
35857b952d6SSatish Balay {
35957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
360cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
36157b952d6SSatish Balay   FILE         *fd;
36257b952d6SSatish Balay   ViewerType   vtype;
36357b952d6SSatish Balay 
36457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
36657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
36757b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
36857b952d6SSatish Balay       int nz, nzalloc, mem;
36957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
37057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
37157b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
37257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
37357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
37457b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
37557b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
37657b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
37757b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
37857b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
37957b952d6SSatish Balay       fflush(fd);
38057b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
38157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
38257b952d6SSatish Balay       return 0;
38357b952d6SSatish Balay     }
38457b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
38557b952d6SSatish Balay       return 0;
38657b952d6SSatish Balay     }
38757b952d6SSatish Balay   }
38857b952d6SSatish Balay 
38957b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
39057b952d6SSatish Balay     Draw       draw;
39157b952d6SSatish Balay     PetscTruth isnull;
39257b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
39357b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
39457b952d6SSatish Balay   }
39557b952d6SSatish Balay 
39657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
39757b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
39857b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
39957b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
40057b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
40157b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
40257b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40357b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
40457b952d6SSatish Balay     fflush(fd);
40557b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
40657b952d6SSatish Balay   }
40757b952d6SSatish Balay   else {
40857b952d6SSatish Balay     int size = baij->size;
40957b952d6SSatish Balay     rank = baij->rank;
41057b952d6SSatish Balay     if (size == 1) {
41157b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
41257b952d6SSatish Balay     }
41357b952d6SSatish Balay     else {
41457b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
41557b952d6SSatish Balay       Mat         A;
41657b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
41757b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
41857b952d6SSatish Balay       int         mbs=baij->mbs;
41957b952d6SSatish Balay       Scalar      *a;
42057b952d6SSatish Balay 
42157b952d6SSatish Balay       if (!rank) {
422cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42357b952d6SSatish Balay         CHKERRQ(ierr);
42457b952d6SSatish Balay       }
42557b952d6SSatish Balay       else {
426cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42757b952d6SSatish Balay         CHKERRQ(ierr);
42857b952d6SSatish Balay       }
42957b952d6SSatish Balay       PLogObjectParent(mat,A);
43057b952d6SSatish Balay 
43157b952d6SSatish Balay       /* copy over the A part */
43257b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
43357b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
43457b952d6SSatish Balay       row = baij->rstart;
43557b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
43657b952d6SSatish Balay 
43757b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
43857b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
43957b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
44057b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
44157b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
44257b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
443cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
444cee3aa6bSSatish Balay             col++; a += bs;
44557b952d6SSatish Balay           }
44657b952d6SSatish Balay         }
44757b952d6SSatish Balay       }
44857b952d6SSatish Balay       /* copy over the B part */
44957b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
45057b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
45157b952d6SSatish Balay       row = baij->rstart*bs;
45257b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
45357b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
45457b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45557b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
45657b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
45757b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
458cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
459cee3aa6bSSatish Balay             col++; a += bs;
46057b952d6SSatish Balay           }
46157b952d6SSatish Balay         }
46257b952d6SSatish Balay       }
46357b952d6SSatish Balay       PetscFree(rvals);
46457b952d6SSatish Balay       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
46557b952d6SSatish Balay       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
46657b952d6SSatish Balay       if (!rank) {
46757b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
46857b952d6SSatish Balay       }
46957b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
47057b952d6SSatish Balay     }
47157b952d6SSatish Balay   }
47257b952d6SSatish Balay   return 0;
47357b952d6SSatish Balay }
47457b952d6SSatish Balay 
47557b952d6SSatish Balay 
47657b952d6SSatish Balay 
47757b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
47857b952d6SSatish Balay {
47957b952d6SSatish Balay   Mat         mat = (Mat) obj;
48057b952d6SSatish Balay   int         ierr;
48157b952d6SSatish Balay   ViewerType  vtype;
48257b952d6SSatish Balay 
48357b952d6SSatish Balay   if (!viewer) {
48457b952d6SSatish Balay     viewer = STDOUT_VIEWER_SELF;
48557b952d6SSatish Balay   }
48657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
48757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
48857b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
48957b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
49057b952d6SSatish Balay   }
49157b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
49257b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
49357b952d6SSatish Balay   }
49457b952d6SSatish Balay   return 0;
49557b952d6SSatish Balay }
49657b952d6SSatish Balay 
49779bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
49879bdfe76SSatish Balay {
49979bdfe76SSatish Balay   Mat         mat = (Mat) obj;
50079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
50179bdfe76SSatish Balay   int         ierr;
50279bdfe76SSatish Balay 
50379bdfe76SSatish Balay #if defined(PETSC_LOG)
50479bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
50579bdfe76SSatish Balay #endif
50679bdfe76SSatish Balay 
50779bdfe76SSatish Balay   PetscFree(baij->rowners);
50879bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
50979bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
51079bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
51179bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
51279bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
51379bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
51479bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
51579bdfe76SSatish Balay   PetscFree(baij);
51679bdfe76SSatish Balay   PLogObjectDestroy(mat);
51779bdfe76SSatish Balay   PetscHeaderDestroy(mat);
51879bdfe76SSatish Balay   return 0;
51979bdfe76SSatish Balay }
52079bdfe76SSatish Balay 
521cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
522cee3aa6bSSatish Balay {
523cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
524cee3aa6bSSatish Balay   int        ierr;
525cee3aa6bSSatish Balay 
526cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
527cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
528cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
529cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
530cee3aa6bSSatish Balay   return 0;
531cee3aa6bSSatish Balay }
532cee3aa6bSSatish Balay 
533cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
534cee3aa6bSSatish Balay {
535cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
536cee3aa6bSSatish Balay   int        ierr;
537cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
538cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
539cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
540cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
541cee3aa6bSSatish Balay   return 0;
542cee3aa6bSSatish Balay }
543cee3aa6bSSatish Balay 
544cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
545cee3aa6bSSatish Balay {
546cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
547cee3aa6bSSatish Balay   int        ierr;
548cee3aa6bSSatish Balay 
549cee3aa6bSSatish Balay   /* do nondiagonal part */
550cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
551cee3aa6bSSatish Balay   /* send it on its way */
552cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
553cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
554cee3aa6bSSatish Balay   /* do local part */
555cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
556cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
557cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
558cee3aa6bSSatish Balay   /* but is not perhaps always true. */
559cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
560cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
561cee3aa6bSSatish Balay   return 0;
562cee3aa6bSSatish Balay }
563cee3aa6bSSatish Balay 
564cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
565cee3aa6bSSatish Balay {
566cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
567cee3aa6bSSatish Balay   int        ierr;
568cee3aa6bSSatish Balay 
569cee3aa6bSSatish Balay   /* do nondiagonal part */
570cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
571cee3aa6bSSatish Balay   /* send it on its way */
572cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
573cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
574cee3aa6bSSatish Balay   /* do local part */
575cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
576cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
577cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
578cee3aa6bSSatish Balay   /* but is not perhaps always true. */
579cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
580cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
581cee3aa6bSSatish Balay   return 0;
582cee3aa6bSSatish Balay }
583cee3aa6bSSatish Balay 
584cee3aa6bSSatish Balay /*
585cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
586cee3aa6bSSatish Balay    diagonal block
587cee3aa6bSSatish Balay */
588cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
589cee3aa6bSSatish Balay {
590cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
591cee3aa6bSSatish Balay   if (a->M != a->N)
592cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
593cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
594cee3aa6bSSatish Balay }
595cee3aa6bSSatish Balay 
596cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
597cee3aa6bSSatish Balay {
598cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
599cee3aa6bSSatish Balay   int        ierr;
600cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
601cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
602cee3aa6bSSatish Balay   return 0;
603cee3aa6bSSatish Balay }
60457b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
60557b952d6SSatish Balay {
60657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
60757b952d6SSatish Balay   *m = mat->M; *n = mat->N;
60857b952d6SSatish Balay   return 0;
60957b952d6SSatish Balay }
61057b952d6SSatish Balay 
61157b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
61257b952d6SSatish Balay {
61357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
61457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
61557b952d6SSatish Balay   return 0;
61657b952d6SSatish Balay }
61757b952d6SSatish Balay 
61857b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
61957b952d6SSatish Balay {
62057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62157b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
62257b952d6SSatish Balay   return 0;
62357b952d6SSatish Balay }
62457b952d6SSatish Balay 
625acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
626acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
627acdf5bf4SSatish Balay 
628acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
629acdf5bf4SSatish Balay {
630acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
631acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
632acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
633d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
634d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
635acdf5bf4SSatish Balay 
636acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
637acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
638acdf5bf4SSatish Balay 
639acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
640acdf5bf4SSatish Balay     /*
641acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
642acdf5bf4SSatish Balay     */
643acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
644*bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
645*bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
646acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
647acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
648acdf5bf4SSatish Balay     }
649acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
650acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
651acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
652acdf5bf4SSatish Balay   }
653acdf5bf4SSatish Balay 
654acdf5bf4SSatish Balay 
655d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
656d9d09a02SSatish Balay   lrow = row - brstart;
657acdf5bf4SSatish Balay 
658acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
659acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
660acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
661acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
662acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
663acdf5bf4SSatish Balay   nztot = nzA + nzB;
664acdf5bf4SSatish Balay 
665acdf5bf4SSatish Balay   cmap  = mat->garray;
666acdf5bf4SSatish Balay   if (v  || idx) {
667acdf5bf4SSatish Balay     if (nztot) {
668acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
669acdf5bf4SSatish Balay       int imark = -1;
670acdf5bf4SSatish Balay       if (v) {
671acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
672acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
673d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
674acdf5bf4SSatish Balay           else break;
675acdf5bf4SSatish Balay         }
676acdf5bf4SSatish Balay         imark = i;
677acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
678acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
679acdf5bf4SSatish Balay       }
680acdf5bf4SSatish Balay       if (idx) {
681acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
682acdf5bf4SSatish Balay         if (imark > -1) {
683acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
684*bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
685acdf5bf4SSatish Balay           }
686acdf5bf4SSatish Balay         } else {
687acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
688d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
689d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
690acdf5bf4SSatish Balay             else break;
691acdf5bf4SSatish Balay           }
692acdf5bf4SSatish Balay           imark = i;
693acdf5bf4SSatish Balay         }
694d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
695d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
696acdf5bf4SSatish Balay       }
697acdf5bf4SSatish Balay     }
698acdf5bf4SSatish Balay     else {*idx = 0; *v=0;}
699acdf5bf4SSatish Balay   }
700acdf5bf4SSatish Balay   *nz = nztot;
701acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
702acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
703acdf5bf4SSatish Balay   return 0;
704acdf5bf4SSatish Balay }
705acdf5bf4SSatish Balay 
706acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
707acdf5bf4SSatish Balay {
708acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
709acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
710acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
711acdf5bf4SSatish Balay   }
712acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
713acdf5bf4SSatish Balay   return 0;
714acdf5bf4SSatish Balay }
715acdf5bf4SSatish Balay 
71679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
71779bdfe76SSatish Balay static struct _MatOps MatOps = {
718*bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
719cee3aa6bSSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
72079bdfe76SSatish Balay   0,0,0,0,
72179bdfe76SSatish Balay   0,0,0,0,
722acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
72357b952d6SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
72457b952d6SSatish Balay   0,0,MatGetReordering_MPIBAIJ,0,
72557b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
72657b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
72779bdfe76SSatish Balay   0,0,0,0,
72879bdfe76SSatish Balay   0,0,0,0,
72979bdfe76SSatish Balay   0,0,0,0,
730acdf5bf4SSatish Balay   0,MatGetValues_MPIBAIJ,0,0,
731cee3aa6bSSatish Balay   MatScale_MPIBAIJ,0,0};
73279bdfe76SSatish Balay 
73379bdfe76SSatish Balay 
73479bdfe76SSatish Balay /*@C
73579bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
73679bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
73779bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
73879bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
73979bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
74079bdfe76SSatish Balay 
74179bdfe76SSatish Balay    Input Parameters:
74279bdfe76SSatish Balay .  comm - MPI communicator
74379bdfe76SSatish Balay .  bs   - size of blockk
74479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
74579bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
74679bdfe76SSatish Balay            if N is given)
74779bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
74879bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
74979bdfe76SSatish Balay            if n is given)
75079bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
75179bdfe76SSatish Balay            submatrix  (same for all local rows)
75279bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
75379bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
75479bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
75579bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
75679bdfe76SSatish Balay            submatrix (same for all local rows).
75779bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
75879bdfe76SSatish Balay            submatrix or null (possibly different for each row).
75979bdfe76SSatish Balay 
76079bdfe76SSatish Balay    Output Parameter:
76179bdfe76SSatish Balay .  A - the matrix
76279bdfe76SSatish Balay 
76379bdfe76SSatish Balay    Notes:
76479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
76579bdfe76SSatish Balay    (possibly both).
76679bdfe76SSatish Balay 
76779bdfe76SSatish Balay    Storage Information:
76879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
76979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
77079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
77179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
77279bdfe76SSatish Balay 
77379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
77479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
77579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
77679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
77779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
77879bdfe76SSatish Balay 
77979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
78079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
78179bdfe76SSatish Balay 
78279bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
78379bdfe76SSatish Balay $         -------------------
78479bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
78579bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
78679bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
78779bdfe76SSatish Balay $         -------------------
78879bdfe76SSatish Balay $
78979bdfe76SSatish Balay 
79079bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
79179bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
79279bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
79357b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
79479bdfe76SSatish Balay 
79579bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
79679bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
79779bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
79879bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
79979bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
80079bdfe76SSatish Balay 
80179bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
80279bdfe76SSatish Balay 
80379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
80479bdfe76SSatish Balay @*/
80579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
80679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
80779bdfe76SSatish Balay {
80879bdfe76SSatish Balay   Mat          B;
80979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
810cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
81179bdfe76SSatish Balay 
81279bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
81379bdfe76SSatish Balay   *A = 0;
81479bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
81579bdfe76SSatish Balay   PLogObjectCreate(B);
81679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
81779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
81879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
81979bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
82079bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
82157b952d6SSatish Balay 
82279bdfe76SSatish Balay   B->factor     = 0;
82379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
82479bdfe76SSatish Balay 
82579bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
82679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
82779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
82879bdfe76SSatish Balay 
82979bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
83057b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
831cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
832cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
833cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
834cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
83579bdfe76SSatish Balay 
83679bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
83779bdfe76SSatish Balay     work[0] = m; work[1] = n;
83879bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
83979bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
84079bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
84179bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
84279bdfe76SSatish Balay   }
84379bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
84479bdfe76SSatish Balay     Mbs = M/bs;
84579bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
84679bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
84779bdfe76SSatish Balay     m   = mbs*bs;
84879bdfe76SSatish Balay   }
84979bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
85079bdfe76SSatish Balay     Nbs = N/bs;
85179bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
85279bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
85379bdfe76SSatish Balay     n   = nbs*bs;
85479bdfe76SSatish Balay   }
855cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
85679bdfe76SSatish Balay 
85779bdfe76SSatish Balay   b->m = m; B->m = m;
85879bdfe76SSatish Balay   b->n = n; B->n = n;
85979bdfe76SSatish Balay   b->N = N; B->N = N;
86079bdfe76SSatish Balay   b->M = M; B->M = M;
86179bdfe76SSatish Balay   b->bs  = bs;
86279bdfe76SSatish Balay   b->bs2 = bs*bs;
86379bdfe76SSatish Balay   b->mbs = mbs;
86479bdfe76SSatish Balay   b->nbs = nbs;
86579bdfe76SSatish Balay   b->Mbs = Mbs;
86679bdfe76SSatish Balay   b->Nbs = Nbs;
86779bdfe76SSatish Balay 
86879bdfe76SSatish Balay   /* build local table of row and column ownerships */
86979bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
87079bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
87179bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
87279bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
87379bdfe76SSatish Balay   b->rowners[0] = 0;
87479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
87579bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
87679bdfe76SSatish Balay   }
87779bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
87879bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
87979bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
88079bdfe76SSatish Balay   b->cowners[0] = 0;
88179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
88279bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
88379bdfe76SSatish Balay   }
88479bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
88579bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
88679bdfe76SSatish Balay 
88779bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
88879bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
88979bdfe76SSatish Balay   PLogObjectParent(B,b->A);
89079bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
89179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
89279bdfe76SSatish Balay   PLogObjectParent(B,b->B);
89379bdfe76SSatish Balay 
89479bdfe76SSatish Balay   /* build cache for off array entries formed */
89579bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
89679bdfe76SSatish Balay   b->colmap      = 0;
89779bdfe76SSatish Balay   b->garray      = 0;
89879bdfe76SSatish Balay   b->roworiented = 1;
89979bdfe76SSatish Balay 
90079bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
90179bdfe76SSatish Balay   b->lvec      = 0;
90279bdfe76SSatish Balay   b->Mvctx     = 0;
90379bdfe76SSatish Balay 
90479bdfe76SSatish Balay   /* stuff for MatGetRow() */
90579bdfe76SSatish Balay   b->rowindices   = 0;
90679bdfe76SSatish Balay   b->rowvalues    = 0;
90779bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
90879bdfe76SSatish Balay 
90979bdfe76SSatish Balay   *A = B;
91079bdfe76SSatish Balay   return 0;
91179bdfe76SSatish Balay }
91257b952d6SSatish Balay 
91357b952d6SSatish Balay #include "sys.h"
91457b952d6SSatish Balay 
91557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
91657b952d6SSatish Balay {
91757b952d6SSatish Balay   Mat          A;
91857b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
91957b952d6SSatish Balay   Scalar       *vals,*buf;
92057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
92157b952d6SSatish Balay   MPI_Status   status;
922cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
92357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
92457b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
92557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
92657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
92757b952d6SSatish Balay 
92857b952d6SSatish Balay 
92957b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
93057b952d6SSatish Balay   bs2  = bs*bs;
93157b952d6SSatish Balay 
93257b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
93357b952d6SSatish Balay   if (!rank) {
93457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
936cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
93757b952d6SSatish Balay   }
93857b952d6SSatish Balay 
93957b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
94057b952d6SSatish Balay   M = header[1]; N = header[2];
94157b952d6SSatish Balay 
94257b952d6SSatish Balay 
94357b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
94457b952d6SSatish Balay 
94557b952d6SSatish Balay   /*
94657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
94757b952d6SSatish Balay      divisible by the blocksize
94857b952d6SSatish Balay   */
94957b952d6SSatish Balay   Mbs        = M/bs;
95057b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
95157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
95257b952d6SSatish Balay   else                  Mbs++;
95357b952d6SSatish Balay   if (extra_rows &&!rank) {
95457b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
95557b952d6SSatish Balay   }
95657b952d6SSatish Balay   /* determine ownership of all rows */
95757b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
95857b952d6SSatish Balay   m   = mbs * bs;
959cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
960cee3aa6bSSatish Balay   browners = rowners + size + 1;
96157b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
96257b952d6SSatish Balay   rowners[0] = 0;
963cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
964cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
96557b952d6SSatish Balay   rstart = rowners[rank];
96657b952d6SSatish Balay   rend   = rowners[rank+1];
96757b952d6SSatish Balay 
96857b952d6SSatish Balay   /* distribute row lengths to all processors */
96957b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
97057b952d6SSatish Balay   if (!rank) {
97157b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
97257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
97357b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
97457b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
975cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
976cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
97757b952d6SSatish Balay     PetscFree(sndcounts);
97857b952d6SSatish Balay   }
97957b952d6SSatish Balay   else {
98057b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
98157b952d6SSatish Balay   }
98257b952d6SSatish Balay 
98357b952d6SSatish Balay   if (!rank) {
98457b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
98557b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
98657b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
98757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
98857b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
98957b952d6SSatish Balay         procsnz[i] += rowlengths[j];
99057b952d6SSatish Balay       }
99157b952d6SSatish Balay     }
99257b952d6SSatish Balay     PetscFree(rowlengths);
99357b952d6SSatish Balay 
99457b952d6SSatish Balay     /* determine max buffer needed and allocate it */
99557b952d6SSatish Balay     maxnz = 0;
99657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
99757b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
99857b952d6SSatish Balay     }
99957b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
100057b952d6SSatish Balay 
100157b952d6SSatish Balay     /* read in my part of the matrix column indices  */
100257b952d6SSatish Balay     nz = procsnz[0];
100357b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
100457b952d6SSatish Balay     mycols = ibuf;
1005cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
100657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1007cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1008cee3aa6bSSatish Balay 
100957b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
101057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
101157b952d6SSatish Balay       nz = procsnz[i];
101257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
101357b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
101457b952d6SSatish Balay     }
101557b952d6SSatish Balay     /* read in the stuff for the last proc */
101657b952d6SSatish Balay     if ( size != 1 ) {
101757b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
101857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
101957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1020cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
102157b952d6SSatish Balay     }
102257b952d6SSatish Balay     PetscFree(cols);
102357b952d6SSatish Balay   }
102457b952d6SSatish Balay   else {
102557b952d6SSatish Balay     /* determine buffer space needed for message */
102657b952d6SSatish Balay     nz = 0;
102757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
102857b952d6SSatish Balay       nz += locrowlens[i];
102957b952d6SSatish Balay     }
103057b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
103157b952d6SSatish Balay     mycols = ibuf;
103257b952d6SSatish Balay     /* receive message of column indices*/
103357b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
103457b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1035cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
103657b952d6SSatish Balay   }
103757b952d6SSatish Balay 
103857b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1039cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1040cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
104157b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1042cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
104357b952d6SSatish Balay   masked1 = mask    + Mbs;
104457b952d6SSatish Balay   masked2 = masked1 + Mbs;
104557b952d6SSatish Balay   rowcount = 0; nzcount = 0;
104657b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
104757b952d6SSatish Balay     dcount  = 0;
104857b952d6SSatish Balay     odcount = 0;
104957b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
105057b952d6SSatish Balay       kmax = locrowlens[rowcount];
105157b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
105257b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
105357b952d6SSatish Balay         if (!mask[tmp]) {
105457b952d6SSatish Balay           mask[tmp] = 1;
105557b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
105657b952d6SSatish Balay           else masked1[dcount++] = tmp;
105757b952d6SSatish Balay         }
105857b952d6SSatish Balay       }
105957b952d6SSatish Balay       rowcount++;
106057b952d6SSatish Balay     }
1061cee3aa6bSSatish Balay 
106257b952d6SSatish Balay     dlens[i]  = dcount;
106357b952d6SSatish Balay     odlens[i] = odcount;
1064cee3aa6bSSatish Balay 
106557b952d6SSatish Balay     /* zero out the mask elements we set */
106657b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
106757b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
106857b952d6SSatish Balay   }
1069cee3aa6bSSatish Balay 
107057b952d6SSatish Balay   /* create our matrix */
107157b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
107257b952d6SSatish Balay   A = *newmat;
107357b952d6SSatish Balay   MatSetOption(A,COLUMNS_SORTED);
107457b952d6SSatish Balay 
107557b952d6SSatish Balay   if (!rank) {
107657b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
107757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
107857b952d6SSatish Balay     nz = procsnz[0];
107957b952d6SSatish Balay     vals = buf;
1080cee3aa6bSSatish Balay     mycols = ibuf;
1081cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
108257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1083cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
108457b952d6SSatish Balay     /* insert into matrix */
108557b952d6SSatish Balay     jj      = rstart*bs;
108657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
108757b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
108857b952d6SSatish Balay       mycols += locrowlens[i];
108957b952d6SSatish Balay       vals   += locrowlens[i];
109057b952d6SSatish Balay       jj++;
109157b952d6SSatish Balay     }
109257b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
109357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
109457b952d6SSatish Balay       nz = procsnz[i];
109557b952d6SSatish Balay       vals = buf;
109657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
109757b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
109857b952d6SSatish Balay     }
109957b952d6SSatish Balay     /* the last proc */
110057b952d6SSatish Balay     if ( size != 1 ){
110157b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1102cee3aa6bSSatish Balay       vals = buf;
110357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
110457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1105cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
110657b952d6SSatish Balay     }
110757b952d6SSatish Balay     PetscFree(procsnz);
110857b952d6SSatish Balay   }
110957b952d6SSatish Balay   else {
111057b952d6SSatish Balay     /* receive numeric values */
111157b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
111257b952d6SSatish Balay 
111357b952d6SSatish Balay     /* receive message of values*/
111457b952d6SSatish Balay     vals = buf;
1115cee3aa6bSSatish Balay     mycols = ibuf;
111657b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
111757b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1118cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
111957b952d6SSatish Balay 
112057b952d6SSatish Balay     /* insert into matrix */
112157b952d6SSatish Balay     jj      = rstart*bs;
1122cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
112357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
112457b952d6SSatish Balay       mycols += locrowlens[i];
112557b952d6SSatish Balay       vals   += locrowlens[i];
112657b952d6SSatish Balay       jj++;
112757b952d6SSatish Balay     }
112857b952d6SSatish Balay   }
112957b952d6SSatish Balay   PetscFree(locrowlens);
113057b952d6SSatish Balay   PetscFree(buf);
113157b952d6SSatish Balay   PetscFree(ibuf);
113257b952d6SSatish Balay   PetscFree(rowners);
113357b952d6SSatish Balay   PetscFree(dlens);
1134cee3aa6bSSatish Balay   PetscFree(mask);
113557b952d6SSatish Balay   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
113657b952d6SSatish Balay   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
113757b952d6SSatish Balay   return 0;
113857b952d6SSatish Balay }
113957b952d6SSatish Balay 
114057b952d6SSatish Balay 
1141