xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 58667388dc052d1a30bc35acfb34c66b1cc7684e)
179bdfe76SSatish Balay #ifndef lint
2*58667388SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.13 1996/07/11 04:06:08 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1557b952d6SSatish Balay 
1657b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
2157b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2257b952d6SSatish Balay {
2357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2457b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2557b952d6SSatish Balay   int        nbs = B->nbs,i;
2657b952d6SSatish Balay 
2757b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
2857b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
2957b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
3057b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
3157b952d6SSatish Balay   return 0;
3257b952d6SSatish Balay }
3357b952d6SSatish Balay 
3457b952d6SSatish Balay 
35acdf5bf4SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
3657b952d6SSatish Balay {
3757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3857b952d6SSatish Balay   int         ierr;
3957b952d6SSatish Balay   if (baij->size == 1) {
4057b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
4157b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
4557b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4657b952d6SSatish Balay {
4757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4857b952d6SSatish Balay   Scalar      value;
4957b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
5057b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
5157b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
5257b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
5357b952d6SSatish Balay 
5457b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
5557b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
5657b952d6SSatish Balay   }
5757b952d6SSatish Balay   baij->insertmode = addv;
5857b952d6SSatish Balay   rstart_orig = rstart*bs;
5957b952d6SSatish Balay   rend_orig   = rend*bs;
6057b952d6SSatish Balay   cstart_orig = cstart*bs;
6157b952d6SSatish Balay   cend_orig   = cend*bs;
6257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
6357b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
6457b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
6557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
6657b952d6SSatish Balay       row = im[i] - rstart_orig;
6757b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
6857b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
6957b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
7057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
7157b952d6SSatish Balay           col = in[j] - cstart_orig;
7257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
7357b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
7457b952d6SSatish Balay         }
7557b952d6SSatish Balay         else {
7657b952d6SSatish Balay           if (mat->was_assembled) {
7757b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
78*58667388SSatish Balay             col = baij->colmap[in[j]/bs] +in[j]%bs;
7957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
8057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
8157b952d6SSatish Balay               col =  in[j];
8257b952d6SSatish Balay             }
8357b952d6SSatish Balay           }
8457b952d6SSatish Balay           else col = in[j];
8557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8657b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8757b952d6SSatish Balay         }
8857b952d6SSatish Balay       }
8957b952d6SSatish Balay     }
9057b952d6SSatish Balay     else {
9157b952d6SSatish Balay       if (roworiented) {
9257b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
9357b952d6SSatish Balay       }
9457b952d6SSatish Balay       else {
9557b952d6SSatish Balay         row = im[i];
9657b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
9757b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
9857b952d6SSatish Balay         }
9957b952d6SSatish Balay       }
10057b952d6SSatish Balay     }
10157b952d6SSatish Balay   }
10257b952d6SSatish Balay   return 0;
10357b952d6SSatish Balay }
10457b952d6SSatish Balay 
105d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
106d6de1c52SSatish Balay {
107d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
108d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
109d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
110d6de1c52SSatish Balay 
111d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
112d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
113d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
114d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
115d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
116d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
117d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
118d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
119d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
120d6de1c52SSatish Balay           col = idxn[j] - bscstart;
121d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
122d6de1c52SSatish Balay         }
123d6de1c52SSatish Balay         else {
124acdf5bf4SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
125d9d09a02SSatish Balay           if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0;
126d9d09a02SSatish Balay           else {
127d9d09a02SSatish Balay             col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs;
128d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
129d6de1c52SSatish Balay           }
130d6de1c52SSatish Balay         }
131d6de1c52SSatish Balay       }
132d9d09a02SSatish Balay     }
133d6de1c52SSatish Balay     else {
134d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
135d6de1c52SSatish Balay     }
136d6de1c52SSatish Balay   }
137d6de1c52SSatish Balay   return 0;
138d6de1c52SSatish Balay }
139d6de1c52SSatish Balay 
140d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
141d6de1c52SSatish Balay {
142d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
143d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
144acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
145d6de1c52SSatish Balay   double     sum = 0.0;
146d6de1c52SSatish Balay   Scalar     *v;
147d6de1c52SSatish Balay 
148d6de1c52SSatish Balay   if (baij->size == 1) {
149d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
150d6de1c52SSatish Balay   } else {
151d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
152d6de1c52SSatish Balay       v = amat->a;
153d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
154d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
155d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
156d6de1c52SSatish Balay #else
157d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
158d6de1c52SSatish Balay #endif
159d6de1c52SSatish Balay       }
160d6de1c52SSatish Balay       v = bmat->a;
161d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
162d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
163d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
164d6de1c52SSatish Balay #else
165d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
166d6de1c52SSatish Balay #endif
167d6de1c52SSatish Balay       }
168d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
169d6de1c52SSatish Balay       *norm = sqrt(*norm);
170d6de1c52SSatish Balay     }
171acdf5bf4SSatish Balay     else
172acdf5bf4SSatish Balay       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
173d6de1c52SSatish Balay   }
174d6de1c52SSatish Balay   return 0;
175d6de1c52SSatish Balay }
17657b952d6SSatish Balay 
17757b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
17857b952d6SSatish Balay {
17957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
18057b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
18157b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
18257b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
18357b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
18457b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
18557b952d6SSatish Balay   InsertMode  addv;
18657b952d6SSatish Balay   Scalar      *rvalues,*svalues;
18757b952d6SSatish Balay 
18857b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
18957b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
19057b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
19157b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
19257b952d6SSatish Balay   }
19357b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
19457b952d6SSatish Balay 
19557b952d6SSatish Balay   /*  first count number of contributors to each processor */
19657b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
19757b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
19857b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
19957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
20057b952d6SSatish Balay     idx = baij->stash.idx[i];
20157b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
20257b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
20357b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
20457b952d6SSatish Balay       }
20557b952d6SSatish Balay     }
20657b952d6SSatish Balay   }
20757b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
20857b952d6SSatish Balay 
20957b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
21057b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
21157b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
21257b952d6SSatish Balay   nreceives = work[rank];
21357b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
21457b952d6SSatish Balay   nmax = work[rank];
21557b952d6SSatish Balay   PetscFree(work);
21657b952d6SSatish Balay 
21757b952d6SSatish Balay   /* post receives:
21857b952d6SSatish Balay        1) each message will consist of ordered pairs
21957b952d6SSatish Balay      (global index,value) we store the global index as a double
22057b952d6SSatish Balay      to simplify the message passing.
22157b952d6SSatish Balay        2) since we don't know how long each individual message is we
22257b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
22357b952d6SSatish Balay      this is a lot of wasted space.
22457b952d6SSatish Balay 
22557b952d6SSatish Balay 
22657b952d6SSatish Balay        This could be done better.
22757b952d6SSatish Balay   */
22857b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
22957b952d6SSatish Balay   CHKPTRQ(rvalues);
23057b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
23157b952d6SSatish Balay   CHKPTRQ(recv_waits);
23257b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
23357b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
23457b952d6SSatish Balay               comm,recv_waits+i);
23557b952d6SSatish Balay   }
23657b952d6SSatish Balay 
23757b952d6SSatish Balay   /* do sends:
23857b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
23957b952d6SSatish Balay          the ith processor
24057b952d6SSatish Balay   */
24157b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
24257b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
24357b952d6SSatish Balay   CHKPTRQ(send_waits);
24457b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
24557b952d6SSatish Balay   starts[0] = 0;
24657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
24757b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
24857b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
24957b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
25057b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
25157b952d6SSatish Balay   }
25257b952d6SSatish Balay   PetscFree(owner);
25357b952d6SSatish Balay   starts[0] = 0;
25457b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
25557b952d6SSatish Balay   count = 0;
25657b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
25757b952d6SSatish Balay     if (procs[i]) {
25857b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
25957b952d6SSatish Balay                 comm,send_waits+count++);
26057b952d6SSatish Balay     }
26157b952d6SSatish Balay   }
26257b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
26357b952d6SSatish Balay 
26457b952d6SSatish Balay   /* Free cache space */
26557b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
26657b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
26757b952d6SSatish Balay 
26857b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
26957b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
27057b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
27157b952d6SSatish Balay   baij->rmax       = nmax;
27257b952d6SSatish Balay 
27357b952d6SSatish Balay   return 0;
27457b952d6SSatish Balay }
27557b952d6SSatish Balay 
27657b952d6SSatish Balay 
27757b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
27857b952d6SSatish Balay {
27957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
28057b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
28157b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
28257b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
28357b952d6SSatish Balay   Scalar      *values,val;
28457b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
28557b952d6SSatish Balay 
28657b952d6SSatish Balay   /*  wait on receives */
28757b952d6SSatish Balay   while (count) {
28857b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
28957b952d6SSatish Balay     /* unpack receives into our local space */
29057b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
29157b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
29257b952d6SSatish Balay     n = n/3;
29357b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
29457b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
29557b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
29657b952d6SSatish Balay       val = values[3*i+2];
29757b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
29857b952d6SSatish Balay         col -= baij->cstart*bs;
29957b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
30057b952d6SSatish Balay       }
30157b952d6SSatish Balay       else {
30257b952d6SSatish Balay         if (mat->was_assembled) {
30357b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
30457b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
30557b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
30657b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
30757b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
30857b952d6SSatish Balay           }
30957b952d6SSatish Balay         }
31057b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
31157b952d6SSatish Balay       }
31257b952d6SSatish Balay     }
31357b952d6SSatish Balay     count--;
31457b952d6SSatish Balay   }
31557b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
31657b952d6SSatish Balay 
31757b952d6SSatish Balay   /* wait on sends */
31857b952d6SSatish Balay   if (baij->nsends) {
31957b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
32057b952d6SSatish Balay     CHKPTRQ(send_status);
32157b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
32257b952d6SSatish Balay     PetscFree(send_status);
32357b952d6SSatish Balay   }
32457b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
32557b952d6SSatish Balay 
32657b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
32757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
32857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
32957b952d6SSatish Balay 
33057b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
33157b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
33257b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
33357b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
33457b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
33557b952d6SSatish Balay   }
33657b952d6SSatish Balay 
3376d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
33857b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
33957b952d6SSatish Balay   }
34057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
34157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
34257b952d6SSatish Balay 
34357b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
34457b952d6SSatish Balay   return 0;
34557b952d6SSatish Balay }
34657b952d6SSatish Balay 
34757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
34857b952d6SSatish Balay {
34957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
35057b952d6SSatish Balay   int          ierr;
35157b952d6SSatish Balay 
35257b952d6SSatish Balay   if (baij->size == 1) {
35357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
35457b952d6SSatish Balay   }
35557b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
35657b952d6SSatish Balay   return 0;
35757b952d6SSatish Balay }
35857b952d6SSatish Balay 
35957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
36057b952d6SSatish Balay {
36157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
362cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
36357b952d6SSatish Balay   FILE         *fd;
36457b952d6SSatish Balay   ViewerType   vtype;
36557b952d6SSatish Balay 
36657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
36757b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
36857b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
36957b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
37057b952d6SSatish Balay       int nz, nzalloc, mem;
37157b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
37257b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
37357b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
37457b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
37557b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
37657b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
37757b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
37857b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
37957b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
38057b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
38157b952d6SSatish Balay       fflush(fd);
38257b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
38357b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
38457b952d6SSatish Balay       return 0;
38557b952d6SSatish Balay     }
38657b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
38757b952d6SSatish Balay       return 0;
38857b952d6SSatish Balay     }
38957b952d6SSatish Balay   }
39057b952d6SSatish Balay 
39157b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
39257b952d6SSatish Balay     Draw       draw;
39357b952d6SSatish Balay     PetscTruth isnull;
39457b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
39557b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
39657b952d6SSatish Balay   }
39757b952d6SSatish Balay 
39857b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
39957b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
40057b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
40157b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
40257b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
40357b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
40457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
40557b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
40657b952d6SSatish Balay     fflush(fd);
40757b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
40857b952d6SSatish Balay   }
40957b952d6SSatish Balay   else {
41057b952d6SSatish Balay     int size = baij->size;
41157b952d6SSatish Balay     rank = baij->rank;
41257b952d6SSatish Balay     if (size == 1) {
41357b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
41457b952d6SSatish Balay     }
41557b952d6SSatish Balay     else {
41657b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
41757b952d6SSatish Balay       Mat         A;
41857b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
41957b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
42057b952d6SSatish Balay       int         mbs=baij->mbs;
42157b952d6SSatish Balay       Scalar      *a;
42257b952d6SSatish Balay 
42357b952d6SSatish Balay       if (!rank) {
424cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42557b952d6SSatish Balay         CHKERRQ(ierr);
42657b952d6SSatish Balay       }
42757b952d6SSatish Balay       else {
428cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
42957b952d6SSatish Balay         CHKERRQ(ierr);
43057b952d6SSatish Balay       }
43157b952d6SSatish Balay       PLogObjectParent(mat,A);
43257b952d6SSatish Balay 
43357b952d6SSatish Balay       /* copy over the A part */
43457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
43557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
43657b952d6SSatish Balay       row = baij->rstart;
43757b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
43857b952d6SSatish Balay 
43957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
44057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
44157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
44257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
44357b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
44457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
445cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
446cee3aa6bSSatish Balay             col++; a += bs;
44757b952d6SSatish Balay           }
44857b952d6SSatish Balay         }
44957b952d6SSatish Balay       }
45057b952d6SSatish Balay       /* copy over the B part */
45157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
45257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
45357b952d6SSatish Balay       row = baij->rstart*bs;
45457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
45557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
45657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
45757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
45857b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
45957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
460cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
461cee3aa6bSSatish Balay             col++; a += bs;
46257b952d6SSatish Balay           }
46357b952d6SSatish Balay         }
46457b952d6SSatish Balay       }
46557b952d6SSatish Balay       PetscFree(rvals);
4666d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
4676d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
46857b952d6SSatish Balay       if (!rank) {
46957b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
47057b952d6SSatish Balay       }
47157b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
47257b952d6SSatish Balay     }
47357b952d6SSatish Balay   }
47457b952d6SSatish Balay   return 0;
47557b952d6SSatish Balay }
47657b952d6SSatish Balay 
47757b952d6SSatish Balay 
47857b952d6SSatish Balay 
47957b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
48057b952d6SSatish Balay {
48157b952d6SSatish Balay   Mat         mat = (Mat) obj;
48257b952d6SSatish Balay   int         ierr;
48357b952d6SSatish Balay   ViewerType  vtype;
48457b952d6SSatish Balay 
48557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
48657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
48757b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
48857b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
48957b952d6SSatish Balay   }
49057b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
49157b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
49257b952d6SSatish Balay   }
49357b952d6SSatish Balay   return 0;
49457b952d6SSatish Balay }
49557b952d6SSatish Balay 
49679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
49779bdfe76SSatish Balay {
49879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
49979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
50079bdfe76SSatish Balay   int         ierr;
50179bdfe76SSatish Balay 
50279bdfe76SSatish Balay #if defined(PETSC_LOG)
50379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
50479bdfe76SSatish Balay #endif
50579bdfe76SSatish Balay 
50679bdfe76SSatish Balay   PetscFree(baij->rowners);
50779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
50879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
50979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
51079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
51179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
51279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
51379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
51479bdfe76SSatish Balay   PetscFree(baij);
51579bdfe76SSatish Balay   PLogObjectDestroy(mat);
51679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
51779bdfe76SSatish Balay   return 0;
51879bdfe76SSatish Balay }
51979bdfe76SSatish Balay 
520cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
521cee3aa6bSSatish Balay {
522cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
52347b4a8eaSLois Curfman McInnes   int         ierr, nt;
524cee3aa6bSSatish Balay 
52547b4a8eaSLois Curfman McInnes   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
52647b4a8eaSLois Curfman McInnes   if (nt != a->n) {
52747b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
52847b4a8eaSLois Curfman McInnes   }
52947b4a8eaSLois Curfman McInnes   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
53047b4a8eaSLois Curfman McInnes   if (nt != a->m) {
53147b4a8eaSLois Curfman McInnes     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy");
53247b4a8eaSLois Curfman McInnes   }
533cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
534cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
535cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
536cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
537cee3aa6bSSatish Balay   return 0;
538cee3aa6bSSatish Balay }
539cee3aa6bSSatish Balay 
540cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
541cee3aa6bSSatish Balay {
542cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
543cee3aa6bSSatish Balay   int        ierr;
544cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
545cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
546cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
547cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
548cee3aa6bSSatish Balay   return 0;
549cee3aa6bSSatish Balay }
550cee3aa6bSSatish Balay 
551cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
552cee3aa6bSSatish Balay {
553cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
554cee3aa6bSSatish Balay   int        ierr;
555cee3aa6bSSatish Balay 
556cee3aa6bSSatish Balay   /* do nondiagonal part */
557cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
558cee3aa6bSSatish Balay   /* send it on its way */
559cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
560cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
561cee3aa6bSSatish Balay   /* do local part */
562cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
563cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
564cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
565cee3aa6bSSatish Balay   /* but is not perhaps always true. */
566cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
567cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
568cee3aa6bSSatish Balay   return 0;
569cee3aa6bSSatish Balay }
570cee3aa6bSSatish Balay 
571cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
572cee3aa6bSSatish Balay {
573cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
574cee3aa6bSSatish Balay   int        ierr;
575cee3aa6bSSatish Balay 
576cee3aa6bSSatish Balay   /* do nondiagonal part */
577cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
578cee3aa6bSSatish Balay   /* send it on its way */
579cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
580cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
581cee3aa6bSSatish Balay   /* do local part */
582cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
583cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
584cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
585cee3aa6bSSatish Balay   /* but is not perhaps always true. */
586cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
587cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
588cee3aa6bSSatish Balay   return 0;
589cee3aa6bSSatish Balay }
590cee3aa6bSSatish Balay 
591cee3aa6bSSatish Balay /*
592cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
593cee3aa6bSSatish Balay    diagonal block
594cee3aa6bSSatish Balay */
595cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
596cee3aa6bSSatish Balay {
597cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
598cee3aa6bSSatish Balay   if (a->M != a->N)
599cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
600cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
601cee3aa6bSSatish Balay }
602cee3aa6bSSatish Balay 
603cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
604cee3aa6bSSatish Balay {
605cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
606cee3aa6bSSatish Balay   int        ierr;
607cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
608cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
609cee3aa6bSSatish Balay   return 0;
610cee3aa6bSSatish Balay }
61157b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
61257b952d6SSatish Balay {
61357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
61457b952d6SSatish Balay   *m = mat->M; *n = mat->N;
61557b952d6SSatish Balay   return 0;
61657b952d6SSatish Balay }
61757b952d6SSatish Balay 
61857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
61957b952d6SSatish Balay {
62057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62157b952d6SSatish Balay   *m = mat->m; *n = mat->N;
62257b952d6SSatish Balay   return 0;
62357b952d6SSatish Balay }
62457b952d6SSatish Balay 
62557b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
62657b952d6SSatish Balay {
62757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
62857b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
62957b952d6SSatish Balay   return 0;
63057b952d6SSatish Balay }
63157b952d6SSatish Balay 
632acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
633acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
634acdf5bf4SSatish Balay 
635acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
636acdf5bf4SSatish Balay {
637acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
638acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
639acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
640d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
641d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
642acdf5bf4SSatish Balay 
643acdf5bf4SSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
644acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
645acdf5bf4SSatish Balay 
646acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
647acdf5bf4SSatish Balay     /*
648acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
649acdf5bf4SSatish Balay     */
650acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
651bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
652bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
653acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
654acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
655acdf5bf4SSatish Balay     }
656acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
657acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
658acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
659acdf5bf4SSatish Balay   }
660acdf5bf4SSatish Balay 
661acdf5bf4SSatish Balay 
662d9d09a02SSatish Balay   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
663d9d09a02SSatish Balay   lrow = row - brstart;
664acdf5bf4SSatish Balay 
665acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
666acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
667acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
668acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
669acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
670acdf5bf4SSatish Balay   nztot = nzA + nzB;
671acdf5bf4SSatish Balay 
672acdf5bf4SSatish Balay   cmap  = mat->garray;
673acdf5bf4SSatish Balay   if (v  || idx) {
674acdf5bf4SSatish Balay     if (nztot) {
675acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
676acdf5bf4SSatish Balay       int imark = -1;
677acdf5bf4SSatish Balay       if (v) {
678acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
679acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
680d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
681acdf5bf4SSatish Balay           else break;
682acdf5bf4SSatish Balay         }
683acdf5bf4SSatish Balay         imark = i;
684acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
685acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
686acdf5bf4SSatish Balay       }
687acdf5bf4SSatish Balay       if (idx) {
688acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
689acdf5bf4SSatish Balay         if (imark > -1) {
690acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
691bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
692acdf5bf4SSatish Balay           }
693acdf5bf4SSatish Balay         } else {
694acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
695d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
696d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
697acdf5bf4SSatish Balay             else break;
698acdf5bf4SSatish Balay           }
699acdf5bf4SSatish Balay           imark = i;
700acdf5bf4SSatish Balay         }
701d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
702d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
703acdf5bf4SSatish Balay       }
704acdf5bf4SSatish Balay     }
705d212a18eSSatish Balay     else {
706d212a18eSSatish Balay       if (idx) *idx = 0;
707d212a18eSSatish Balay       if (v)   *v   = 0;
708d212a18eSSatish Balay     }
709acdf5bf4SSatish Balay   }
710acdf5bf4SSatish Balay   *nz = nztot;
711acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
712acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
713acdf5bf4SSatish Balay   return 0;
714acdf5bf4SSatish Balay }
715acdf5bf4SSatish Balay 
716acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
717acdf5bf4SSatish Balay {
718acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
719acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
720acdf5bf4SSatish Balay     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
721acdf5bf4SSatish Balay   }
722acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
723acdf5bf4SSatish Balay   return 0;
724acdf5bf4SSatish Balay }
725acdf5bf4SSatish Balay 
7265a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat, int *bs)
7275a838052SSatish Balay {
7285a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
7295a838052SSatish Balay   *bs = baij->bs;
7305a838052SSatish Balay   return 0;
7315a838052SSatish Balay }
7325a838052SSatish Balay 
733*58667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
734*58667388SSatish Balay {
735*58667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
736*58667388SSatish Balay   int         ierr;
737*58667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
738*58667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
739*58667388SSatish Balay   return 0;
740*58667388SSatish Balay }
741*58667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
742*58667388SSatish Balay {
743*58667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
744*58667388SSatish Balay 
745*58667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
746*58667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
747*58667388SSatish Balay       op == MAT_COLUMNS_SORTED ||
748*58667388SSatish Balay       op == MAT_ROW_ORIENTED) {
749*58667388SSatish Balay         MatSetOption(a->A,op);
750*58667388SSatish Balay         MatSetOption(a->B,op);
751*58667388SSatish Balay   }
752*58667388SSatish Balay   else if (op == MAT_ROWS_SORTED ||
753*58667388SSatish Balay            op == MAT_SYMMETRIC ||
754*58667388SSatish Balay            op == MAT_STRUCTURALLY_SYMMETRIC ||
755*58667388SSatish Balay            op == MAT_YES_NEW_DIAGONALS)
756*58667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
757*58667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
758*58667388SSatish Balay     a->roworiented = 0;
759*58667388SSatish Balay     MatSetOption(a->A,op);
760*58667388SSatish Balay     MatSetOption(a->B,op);
761*58667388SSatish Balay   }
762*58667388SSatish Balay   else if (op == MAT_NO_NEW_DIAGONALS)
763*58667388SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
764*58667388SSatish Balay   else
765*58667388SSatish Balay     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
766*58667388SSatish Balay   return 0;
767*58667388SSatish Balay }
768*58667388SSatish Balay 
76979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
77079bdfe76SSatish Balay static struct _MatOps MatOps = {
771bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
772cee3aa6bSSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
77379bdfe76SSatish Balay   0,0,0,0,
77479bdfe76SSatish Balay   0,0,0,0,
775acdf5bf4SSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
776*58667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
777*58667388SSatish Balay   MatZeroEntries_MPIBAIJ,0,MatGetReordering_MPIBAIJ,0,
77857b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
77957b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
78079bdfe76SSatish Balay   0,0,0,0,
78179bdfe76SSatish Balay   0,0,0,0,
782d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
783d212a18eSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,0,
7845a838052SSatish Balay   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ};
78579bdfe76SSatish Balay 
78679bdfe76SSatish Balay 
78779bdfe76SSatish Balay /*@C
78879bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
78979bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
79079bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
79179bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
79279bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
79379bdfe76SSatish Balay 
79479bdfe76SSatish Balay    Input Parameters:
79579bdfe76SSatish Balay .  comm - MPI communicator
79679bdfe76SSatish Balay .  bs   - size of blockk
79779bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
79892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
79992e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
80092e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
80192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
80292e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
80379bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
80492e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
80579bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
80679bdfe76SSatish Balay            submatrix  (same for all local rows)
80792e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
80892e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
80992e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
81092e8d321SLois Curfman McInnes            it is zero.
81192e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
81279bdfe76SSatish Balay            submatrix (same for all local rows).
81392e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
81492e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
81592e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
81679bdfe76SSatish Balay 
81779bdfe76SSatish Balay    Output Parameter:
81879bdfe76SSatish Balay .  A - the matrix
81979bdfe76SSatish Balay 
82079bdfe76SSatish Balay    Notes:
82179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
82279bdfe76SSatish Balay    (possibly both).
82379bdfe76SSatish Balay 
82479bdfe76SSatish Balay    Storage Information:
82579bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
82679bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
82779bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
82879bdfe76SSatish Balay    local matrix (a rectangular submatrix).
82979bdfe76SSatish Balay 
83079bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
83179bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
83279bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
83379bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
83479bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
83579bdfe76SSatish Balay 
83679bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
83779bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
83879bdfe76SSatish Balay 
83979bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
84079bdfe76SSatish Balay $         -------------------
84179bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
84279bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
84379bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
84479bdfe76SSatish Balay $         -------------------
84579bdfe76SSatish Balay $
84679bdfe76SSatish Balay 
84779bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
84879bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
84979bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
85057b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
85179bdfe76SSatish Balay 
85279bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
85379bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
85479bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
85592e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
85692e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
85792e8d321SLois Curfman McInnes    matrices and the file $(PETSC_DIR)/Performance.
85879bdfe76SSatish Balay 
85992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
86079bdfe76SSatish Balay 
86179bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
86279bdfe76SSatish Balay @*/
86379bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
86479bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
86579bdfe76SSatish Balay {
86679bdfe76SSatish Balay   Mat          B;
86779bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
868cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
86979bdfe76SSatish Balay 
87079bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
87179bdfe76SSatish Balay   *A = 0;
87279bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
87379bdfe76SSatish Balay   PLogObjectCreate(B);
87479bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
87579bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
87679bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
87779bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
87879bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
87957b952d6SSatish Balay 
88079bdfe76SSatish Balay   B->factor     = 0;
88179bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
88279bdfe76SSatish Balay 
88379bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
88479bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
88579bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
88679bdfe76SSatish Balay 
88779bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
88857b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
889cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
890cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
891cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
892cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
89379bdfe76SSatish Balay 
89479bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
89579bdfe76SSatish Balay     work[0] = m; work[1] = n;
89679bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
89779bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
89879bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
89979bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
90079bdfe76SSatish Balay   }
90179bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
90279bdfe76SSatish Balay     Mbs = M/bs;
90379bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
90479bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
90579bdfe76SSatish Balay     m   = mbs*bs;
90679bdfe76SSatish Balay   }
90779bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
90879bdfe76SSatish Balay     Nbs = N/bs;
90979bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
91079bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
91179bdfe76SSatish Balay     n   = nbs*bs;
91279bdfe76SSatish Balay   }
913cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
91479bdfe76SSatish Balay 
91579bdfe76SSatish Balay   b->m = m; B->m = m;
91679bdfe76SSatish Balay   b->n = n; B->n = n;
91779bdfe76SSatish Balay   b->N = N; B->N = N;
91879bdfe76SSatish Balay   b->M = M; B->M = M;
91979bdfe76SSatish Balay   b->bs  = bs;
92079bdfe76SSatish Balay   b->bs2 = bs*bs;
92179bdfe76SSatish Balay   b->mbs = mbs;
92279bdfe76SSatish Balay   b->nbs = nbs;
92379bdfe76SSatish Balay   b->Mbs = Mbs;
92479bdfe76SSatish Balay   b->Nbs = Nbs;
92579bdfe76SSatish Balay 
92679bdfe76SSatish Balay   /* build local table of row and column ownerships */
92779bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
92879bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
92979bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
93079bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
93179bdfe76SSatish Balay   b->rowners[0] = 0;
93279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
93379bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
93479bdfe76SSatish Balay   }
93579bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
93679bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
93779bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
93879bdfe76SSatish Balay   b->cowners[0] = 0;
93979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
94079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
94179bdfe76SSatish Balay   }
94279bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
94379bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
94479bdfe76SSatish Balay 
94579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
94679bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
94779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
94879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
94979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
95079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
95179bdfe76SSatish Balay 
95279bdfe76SSatish Balay   /* build cache for off array entries formed */
95379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
95479bdfe76SSatish Balay   b->colmap      = 0;
95579bdfe76SSatish Balay   b->garray      = 0;
95679bdfe76SSatish Balay   b->roworiented = 1;
95779bdfe76SSatish Balay 
95879bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
95979bdfe76SSatish Balay   b->lvec      = 0;
96079bdfe76SSatish Balay   b->Mvctx     = 0;
96179bdfe76SSatish Balay 
96279bdfe76SSatish Balay   /* stuff for MatGetRow() */
96379bdfe76SSatish Balay   b->rowindices   = 0;
96479bdfe76SSatish Balay   b->rowvalues    = 0;
96579bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
96679bdfe76SSatish Balay 
96779bdfe76SSatish Balay   *A = B;
96879bdfe76SSatish Balay   return 0;
96979bdfe76SSatish Balay }
97057b952d6SSatish Balay 
97157b952d6SSatish Balay #include "sys.h"
97257b952d6SSatish Balay 
97357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
97457b952d6SSatish Balay {
97557b952d6SSatish Balay   Mat          A;
97657b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
97757b952d6SSatish Balay   Scalar       *vals,*buf;
97857b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
97957b952d6SSatish Balay   MPI_Status   status;
980cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
98157b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
98257b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
98357b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
98457b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
98557b952d6SSatish Balay 
98657b952d6SSatish Balay 
98757b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
98857b952d6SSatish Balay   bs2  = bs*bs;
98957b952d6SSatish Balay 
99057b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
99157b952d6SSatish Balay   if (!rank) {
99257b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
99357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
994cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
99557b952d6SSatish Balay   }
99657b952d6SSatish Balay 
99757b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
99857b952d6SSatish Balay   M = header[1]; N = header[2];
99957b952d6SSatish Balay 
100057b952d6SSatish Balay 
100157b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
100257b952d6SSatish Balay 
100357b952d6SSatish Balay   /*
100457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
100557b952d6SSatish Balay      divisible by the blocksize
100657b952d6SSatish Balay   */
100757b952d6SSatish Balay   Mbs        = M/bs;
100857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
100957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
101057b952d6SSatish Balay   else                  Mbs++;
101157b952d6SSatish Balay   if (extra_rows &&!rank) {
101257b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
101357b952d6SSatish Balay   }
101457b952d6SSatish Balay   /* determine ownership of all rows */
101557b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
101657b952d6SSatish Balay   m   = mbs * bs;
1017cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1018cee3aa6bSSatish Balay   browners = rowners + size + 1;
101957b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
102057b952d6SSatish Balay   rowners[0] = 0;
1021cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1022cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
102357b952d6SSatish Balay   rstart = rowners[rank];
102457b952d6SSatish Balay   rend   = rowners[rank+1];
102557b952d6SSatish Balay 
102657b952d6SSatish Balay   /* distribute row lengths to all processors */
102757b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
102857b952d6SSatish Balay   if (!rank) {
102957b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
103057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
103157b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
103257b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1033cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1034cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
103557b952d6SSatish Balay     PetscFree(sndcounts);
103657b952d6SSatish Balay   }
103757b952d6SSatish Balay   else {
103857b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
103957b952d6SSatish Balay   }
104057b952d6SSatish Balay 
104157b952d6SSatish Balay   if (!rank) {
104257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
104357b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
104457b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
104557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
104657b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
104757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
104857b952d6SSatish Balay       }
104957b952d6SSatish Balay     }
105057b952d6SSatish Balay     PetscFree(rowlengths);
105157b952d6SSatish Balay 
105257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
105357b952d6SSatish Balay     maxnz = 0;
105457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
105557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
105657b952d6SSatish Balay     }
105757b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
105857b952d6SSatish Balay 
105957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
106057b952d6SSatish Balay     nz = procsnz[0];
106157b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
106257b952d6SSatish Balay     mycols = ibuf;
1063cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
106457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1065cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1066cee3aa6bSSatish Balay 
106757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
106857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
106957b952d6SSatish Balay       nz = procsnz[i];
107057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
107157b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
107257b952d6SSatish Balay     }
107357b952d6SSatish Balay     /* read in the stuff for the last proc */
107457b952d6SSatish Balay     if ( size != 1 ) {
107557b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
107657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
107757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1078cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
107957b952d6SSatish Balay     }
108057b952d6SSatish Balay     PetscFree(cols);
108157b952d6SSatish Balay   }
108257b952d6SSatish Balay   else {
108357b952d6SSatish Balay     /* determine buffer space needed for message */
108457b952d6SSatish Balay     nz = 0;
108557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
108657b952d6SSatish Balay       nz += locrowlens[i];
108757b952d6SSatish Balay     }
108857b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
108957b952d6SSatish Balay     mycols = ibuf;
109057b952d6SSatish Balay     /* receive message of column indices*/
109157b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
109257b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1093cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
109457b952d6SSatish Balay   }
109557b952d6SSatish Balay 
109657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1097cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1098cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
109957b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1100cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
110157b952d6SSatish Balay   masked1 = mask    + Mbs;
110257b952d6SSatish Balay   masked2 = masked1 + Mbs;
110357b952d6SSatish Balay   rowcount = 0; nzcount = 0;
110457b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
110557b952d6SSatish Balay     dcount  = 0;
110657b952d6SSatish Balay     odcount = 0;
110757b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
110857b952d6SSatish Balay       kmax = locrowlens[rowcount];
110957b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
111057b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
111157b952d6SSatish Balay         if (!mask[tmp]) {
111257b952d6SSatish Balay           mask[tmp] = 1;
111357b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
111457b952d6SSatish Balay           else masked1[dcount++] = tmp;
111557b952d6SSatish Balay         }
111657b952d6SSatish Balay       }
111757b952d6SSatish Balay       rowcount++;
111857b952d6SSatish Balay     }
1119cee3aa6bSSatish Balay 
112057b952d6SSatish Balay     dlens[i]  = dcount;
112157b952d6SSatish Balay     odlens[i] = odcount;
1122cee3aa6bSSatish Balay 
112357b952d6SSatish Balay     /* zero out the mask elements we set */
112457b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
112557b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
112657b952d6SSatish Balay   }
1127cee3aa6bSSatish Balay 
112857b952d6SSatish Balay   /* create our matrix */
112957b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
113057b952d6SSatish Balay   A = *newmat;
11316d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
113257b952d6SSatish Balay 
113357b952d6SSatish Balay   if (!rank) {
113457b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
113557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
113657b952d6SSatish Balay     nz = procsnz[0];
113757b952d6SSatish Balay     vals = buf;
1138cee3aa6bSSatish Balay     mycols = ibuf;
1139cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
114057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1141cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
114257b952d6SSatish Balay     /* insert into matrix */
114357b952d6SSatish Balay     jj      = rstart*bs;
114457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
114557b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
114657b952d6SSatish Balay       mycols += locrowlens[i];
114757b952d6SSatish Balay       vals   += locrowlens[i];
114857b952d6SSatish Balay       jj++;
114957b952d6SSatish Balay     }
115057b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
115157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
115257b952d6SSatish Balay       nz = procsnz[i];
115357b952d6SSatish Balay       vals = buf;
115457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
115557b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
115657b952d6SSatish Balay     }
115757b952d6SSatish Balay     /* the last proc */
115857b952d6SSatish Balay     if ( size != 1 ){
115957b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1160cee3aa6bSSatish Balay       vals = buf;
116157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
116257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1163cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
116457b952d6SSatish Balay     }
116557b952d6SSatish Balay     PetscFree(procsnz);
116657b952d6SSatish Balay   }
116757b952d6SSatish Balay   else {
116857b952d6SSatish Balay     /* receive numeric values */
116957b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
117057b952d6SSatish Balay 
117157b952d6SSatish Balay     /* receive message of values*/
117257b952d6SSatish Balay     vals = buf;
1173cee3aa6bSSatish Balay     mycols = ibuf;
117457b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
117557b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1176cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
117757b952d6SSatish Balay 
117857b952d6SSatish Balay     /* insert into matrix */
117957b952d6SSatish Balay     jj      = rstart*bs;
1180cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
118157b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
118257b952d6SSatish Balay       mycols += locrowlens[i];
118357b952d6SSatish Balay       vals   += locrowlens[i];
118457b952d6SSatish Balay       jj++;
118557b952d6SSatish Balay     }
118657b952d6SSatish Balay   }
118757b952d6SSatish Balay   PetscFree(locrowlens);
118857b952d6SSatish Balay   PetscFree(buf);
118957b952d6SSatish Balay   PetscFree(ibuf);
119057b952d6SSatish Balay   PetscFree(rowners);
119157b952d6SSatish Balay   PetscFree(dlens);
1192cee3aa6bSSatish Balay   PetscFree(mask);
11936d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11946d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
119557b952d6SSatish Balay   return 0;
119657b952d6SSatish Balay }
119757b952d6SSatish Balay 
119857b952d6SSatish Balay 
1199