xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d6de1c526f58b790785007579d102dd77797ed16)
179bdfe76SSatish Balay #ifndef lint
2*d6de1c52SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.4 1996/06/21 23:12:01 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
1357b952d6SSatish Balay 
1457b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
1557b952d6SSatish Balay number to the local number in the off-diagonal part of the local
1657b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
1757b952d6SSatish Balay length of colmap equals the global matrix length.
1857b952d6SSatish Balay */
1957b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2057b952d6SSatish Balay {
2157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2257b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2357b952d6SSatish Balay   int        nbs = B->nbs,i;
2457b952d6SSatish Balay 
2557b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
2657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
2757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
2857b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
2957b952d6SSatish Balay   return 0;
3057b952d6SSatish Balay }
3157b952d6SSatish Balay 
3257b952d6SSatish Balay 
3357b952d6SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   int         ierr;
3757b952d6SSatish Balay   if (baij->size == 1) {
3857b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
3957b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
4057b952d6SSatish Balay   return 0;
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4357b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4457b952d6SSatish Balay {
4557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4657b952d6SSatish Balay   Scalar      value;
4757b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
4857b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
4957b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
5057b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
5157b952d6SSatish Balay 
5257b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
5357b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
5457b952d6SSatish Balay   }
5557b952d6SSatish Balay   baij->insertmode = addv;
5657b952d6SSatish Balay   rstart_orig = rstart*bs;
5757b952d6SSatish Balay   rend_orig   = rend*bs;
5857b952d6SSatish Balay   cstart_orig = cstart*bs;
5957b952d6SSatish Balay   cend_orig   = cend*bs;
6057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
6157b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
6257b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
6357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
6457b952d6SSatish Balay       row = im[i] - rstart_orig;
6557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
6657b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
6757b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
6857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
6957b952d6SSatish Balay           col = in[j] - cstart_orig;
7057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
7157b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
7257b952d6SSatish Balay         }
7357b952d6SSatish Balay         else {
7457b952d6SSatish Balay           if (mat->was_assembled) {
7557b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
7657b952d6SSatish Balay             col = baij->colmap[in[j]];
7757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
7857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
7957b952d6SSatish Balay               col =  in[j];
8057b952d6SSatish Balay             }
8157b952d6SSatish Balay           }
8257b952d6SSatish Balay           else col = in[j];
8357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
8457b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
8557b952d6SSatish Balay         }
8657b952d6SSatish Balay       }
8757b952d6SSatish Balay     }
8857b952d6SSatish Balay     else {
8957b952d6SSatish Balay       if (roworiented) {
9057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
9157b952d6SSatish Balay       }
9257b952d6SSatish Balay       else {
9357b952d6SSatish Balay         row = im[i];
9457b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
9557b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
9657b952d6SSatish Balay         }
9757b952d6SSatish Balay       }
9857b952d6SSatish Balay     }
9957b952d6SSatish Balay   }
10057b952d6SSatish Balay   return 0;
10157b952d6SSatish Balay }
10257b952d6SSatish Balay 
103*d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
104*d6de1c52SSatish Balay {
105*d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
106*d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
107*d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
108*d6de1c52SSatish Balay 
109*d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
110*d6de1c52SSatish Balay     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
111*d6de1c52SSatish Balay     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
112*d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
113*d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
114*d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
115*d6de1c52SSatish Balay         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
116*d6de1c52SSatish Balay         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
117*d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
118*d6de1c52SSatish Balay           col = idxn[j] - bscstart;
119*d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
120*d6de1c52SSatish Balay         }
121*d6de1c52SSatish Balay         else {
122*d6de1c52SSatish Balay           col = baij->colmap[idxn[j]/bs]*bs + col%bs;
123*d6de1c52SSatish Balay           ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
124*d6de1c52SSatish Balay         }
125*d6de1c52SSatish Balay       }
126*d6de1c52SSatish Balay     }
127*d6de1c52SSatish Balay     else {
128*d6de1c52SSatish Balay       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
129*d6de1c52SSatish Balay     }
130*d6de1c52SSatish Balay   }
131*d6de1c52SSatish Balay   return 0;
132*d6de1c52SSatish Balay }
133*d6de1c52SSatish Balay 
134*d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
135*d6de1c52SSatish Balay {
136*d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
137*d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
138*d6de1c52SSatish Balay   int        ierr, i, j, cstart = baij->cstart,bs2=baij->bs2;
139*d6de1c52SSatish Balay   double     sum = 0.0;
140*d6de1c52SSatish Balay   Scalar     *v;
141*d6de1c52SSatish Balay 
142*d6de1c52SSatish Balay   if (baij->size == 1) {
143*d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
144*d6de1c52SSatish Balay   } else {
145*d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
146*d6de1c52SSatish Balay       v = amat->a;
147*d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
148*d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
149*d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
150*d6de1c52SSatish Balay #else
151*d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
152*d6de1c52SSatish Balay #endif
153*d6de1c52SSatish Balay       }
154*d6de1c52SSatish Balay       v = bmat->a;
155*d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
156*d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
157*d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
158*d6de1c52SSatish Balay #else
159*d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
160*d6de1c52SSatish Balay #endif
161*d6de1c52SSatish Balay       }
162*d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
163*d6de1c52SSatish Balay       *norm = sqrt(*norm);
164*d6de1c52SSatish Balay     }
165*d6de1c52SSatish Balay     else if (type == NORM_1) { /* max column norm */
166*d6de1c52SSatish Balay       double *tmp, *tmp2;
167*d6de1c52SSatish Balay       int    *jj, *garray = baij->garray;
168*d6de1c52SSatish Balay       tmp  = (double *) PetscMalloc( baij->N*sizeof(double) ); CHKPTRQ(tmp);
169*d6de1c52SSatish Balay       tmp2 = (double *) PetscMalloc( baij->N*sizeof(double) ); CHKPTRQ(tmp2);
170*d6de1c52SSatish Balay       PetscMemzero(tmp,baij->N*sizeof(double));
171*d6de1c52SSatish Balay       *norm = 0.0;
172*d6de1c52SSatish Balay       v = amat->a; jj = amat->j;
173*d6de1c52SSatish Balay       for ( j=0; j<amat->nz; j++ ) {
174*d6de1c52SSatish Balay         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
175*d6de1c52SSatish Balay       }
176*d6de1c52SSatish Balay       v = bmat->a; jj = bmat->j;
177*d6de1c52SSatish Balay       for ( j=0; j<bmat->nz; j++ ) {
178*d6de1c52SSatish Balay         tmp[garray[*jj++ ]] += PetscAbsScalar(*v); v++;
179*d6de1c52SSatish Balay       }
180*d6de1c52SSatish Balay       MPI_Allreduce(tmp,tmp2,baij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
181*d6de1c52SSatish Balay       for ( j=0; j<baij->N; j++ ) {
182*d6de1c52SSatish Balay         if (tmp2[j] > *norm) *norm = tmp2[j];
183*d6de1c52SSatish Balay       }
184*d6de1c52SSatish Balay       PetscFree(tmp); PetscFree(tmp2);
185*d6de1c52SSatish Balay     }
186*d6de1c52SSatish Balay     else if (type == NORM_INFINITY) { /* max row norm */
187*d6de1c52SSatish Balay       double ntemp = 0.0;
188*d6de1c52SSatish Balay       for ( j=0; j<amat->m; j++ ) {
189*d6de1c52SSatish Balay         v = amat->a + amat->i[j];
190*d6de1c52SSatish Balay         sum = 0.0;
191*d6de1c52SSatish Balay         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
192*d6de1c52SSatish Balay           sum += PetscAbsScalar(*v); v++;
193*d6de1c52SSatish Balay         }
194*d6de1c52SSatish Balay         v = bmat->a + bmat->i[j];
195*d6de1c52SSatish Balay         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
196*d6de1c52SSatish Balay           sum += PetscAbsScalar(*v); v++;
197*d6de1c52SSatish Balay         }
198*d6de1c52SSatish Balay         if (sum > ntemp) ntemp = sum;
199*d6de1c52SSatish Balay       }
200*d6de1c52SSatish Balay       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
201*d6de1c52SSatish Balay     }
202*d6de1c52SSatish Balay     else {
203*d6de1c52SSatish Balay       SETERRQ(1,"MatNorm_MPIBAIJ:No support for two norm");
204*d6de1c52SSatish Balay     }
205*d6de1c52SSatish Balay   }
206*d6de1c52SSatish Balay   return 0;
207*d6de1c52SSatish Balay }
20857b952d6SSatish Balay 
20957b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
21057b952d6SSatish Balay {
21157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
21257b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
21357b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
21457b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
21557b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
21657b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
21757b952d6SSatish Balay   InsertMode  addv;
21857b952d6SSatish Balay   Scalar      *rvalues,*svalues;
21957b952d6SSatish Balay 
22057b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
22157b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
22257b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
22357b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
22457b952d6SSatish Balay   }
22557b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
22657b952d6SSatish Balay 
22757b952d6SSatish Balay   /*  first count number of contributors to each processor */
22857b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
22957b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
23057b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
23157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
23257b952d6SSatish Balay     idx = baij->stash.idx[i];
23357b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
23457b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
23557b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
23657b952d6SSatish Balay       }
23757b952d6SSatish Balay     }
23857b952d6SSatish Balay   }
23957b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
24057b952d6SSatish Balay 
24157b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
24257b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
24357b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
24457b952d6SSatish Balay   nreceives = work[rank];
24557b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
24657b952d6SSatish Balay   nmax = work[rank];
24757b952d6SSatish Balay   PetscFree(work);
24857b952d6SSatish Balay 
24957b952d6SSatish Balay   /* post receives:
25057b952d6SSatish Balay        1) each message will consist of ordered pairs
25157b952d6SSatish Balay      (global index,value) we store the global index as a double
25257b952d6SSatish Balay      to simplify the message passing.
25357b952d6SSatish Balay        2) since we don't know how long each individual message is we
25457b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
25557b952d6SSatish Balay      this is a lot of wasted space.
25657b952d6SSatish Balay 
25757b952d6SSatish Balay 
25857b952d6SSatish Balay        This could be done better.
25957b952d6SSatish Balay   */
26057b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
26157b952d6SSatish Balay   CHKPTRQ(rvalues);
26257b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
26357b952d6SSatish Balay   CHKPTRQ(recv_waits);
26457b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
26557b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
26657b952d6SSatish Balay               comm,recv_waits+i);
26757b952d6SSatish Balay   }
26857b952d6SSatish Balay 
26957b952d6SSatish Balay   /* do sends:
27057b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
27157b952d6SSatish Balay          the ith processor
27257b952d6SSatish Balay   */
27357b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
27457b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
27557b952d6SSatish Balay   CHKPTRQ(send_waits);
27657b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
27757b952d6SSatish Balay   starts[0] = 0;
27857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
27957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
28057b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
28157b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
28257b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
28357b952d6SSatish Balay   }
28457b952d6SSatish Balay   PetscFree(owner);
28557b952d6SSatish Balay   starts[0] = 0;
28657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
28757b952d6SSatish Balay   count = 0;
28857b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
28957b952d6SSatish Balay     if (procs[i]) {
29057b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
29157b952d6SSatish Balay                 comm,send_waits+count++);
29257b952d6SSatish Balay     }
29357b952d6SSatish Balay   }
29457b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
29557b952d6SSatish Balay 
29657b952d6SSatish Balay   /* Free cache space */
29757b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
29857b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
29957b952d6SSatish Balay 
30057b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
30157b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
30257b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
30357b952d6SSatish Balay   baij->rmax       = nmax;
30457b952d6SSatish Balay 
30557b952d6SSatish Balay   return 0;
30657b952d6SSatish Balay }
30757b952d6SSatish Balay 
30857b952d6SSatish Balay 
30957b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
31057b952d6SSatish Balay {
31157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31257b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
31357b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
31457b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
31557b952d6SSatish Balay   Scalar      *values,val;
31657b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
31757b952d6SSatish Balay 
31857b952d6SSatish Balay   /*  wait on receives */
31957b952d6SSatish Balay   while (count) {
32057b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
32157b952d6SSatish Balay     /* unpack receives into our local space */
32257b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
32357b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
32457b952d6SSatish Balay     n = n/3;
32557b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
32657b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
32757b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
32857b952d6SSatish Balay       val = values[3*i+2];
32957b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
33057b952d6SSatish Balay         col -= baij->cstart*bs;
33157b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
33257b952d6SSatish Balay       }
33357b952d6SSatish Balay       else {
33457b952d6SSatish Balay         if (mat->was_assembled) {
33557b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
33657b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
33757b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
33857b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
33957b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
34057b952d6SSatish Balay           }
34157b952d6SSatish Balay         }
34257b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
34357b952d6SSatish Balay       }
34457b952d6SSatish Balay     }
34557b952d6SSatish Balay     count--;
34657b952d6SSatish Balay   }
34757b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
34857b952d6SSatish Balay 
34957b952d6SSatish Balay   /* wait on sends */
35057b952d6SSatish Balay   if (baij->nsends) {
35157b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
35257b952d6SSatish Balay     CHKPTRQ(send_status);
35357b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
35457b952d6SSatish Balay     PetscFree(send_status);
35557b952d6SSatish Balay   }
35657b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
35757b952d6SSatish Balay 
35857b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
35957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
36057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
36157b952d6SSatish Balay 
36257b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
36357b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
36457b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
36557b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
36657b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
36757b952d6SSatish Balay   }
36857b952d6SSatish Balay 
36957b952d6SSatish Balay   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
37057b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
37157b952d6SSatish Balay   }
37257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
37357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
37457b952d6SSatish Balay 
37557b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
37657b952d6SSatish Balay   return 0;
37757b952d6SSatish Balay }
37857b952d6SSatish Balay 
37957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
38057b952d6SSatish Balay {
38157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
38257b952d6SSatish Balay   int          ierr;
38357b952d6SSatish Balay 
38457b952d6SSatish Balay   if (baij->size == 1) {
38557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
38657b952d6SSatish Balay   }
38757b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
38857b952d6SSatish Balay   return 0;
38957b952d6SSatish Balay }
39057b952d6SSatish Balay 
39157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
39257b952d6SSatish Balay {
39357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
394cee3aa6bSSatish Balay   int          ierr, format,rank,bs=baij->bs;
39557b952d6SSatish Balay   FILE         *fd;
39657b952d6SSatish Balay   ViewerType   vtype;
39757b952d6SSatish Balay 
39857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
39957b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
40057b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
40157b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
40257b952d6SSatish Balay       int nz, nzalloc, mem;
40357b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
40457b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
40557b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
40657b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
40757b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
40857b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
40957b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
41057b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
41157b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
41257b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
41357b952d6SSatish Balay       fflush(fd);
41457b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
41557b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
41657b952d6SSatish Balay       return 0;
41757b952d6SSatish Balay     }
41857b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
41957b952d6SSatish Balay       return 0;
42057b952d6SSatish Balay     }
42157b952d6SSatish Balay   }
42257b952d6SSatish Balay 
42357b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
42457b952d6SSatish Balay     Draw       draw;
42557b952d6SSatish Balay     PetscTruth isnull;
42657b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
42757b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
42857b952d6SSatish Balay   }
42957b952d6SSatish Balay 
43057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
43157b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
43257b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
43357b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
43457b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
43557b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
43657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
43757b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
43857b952d6SSatish Balay     fflush(fd);
43957b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
44057b952d6SSatish Balay   }
44157b952d6SSatish Balay   else {
44257b952d6SSatish Balay     int size = baij->size;
44357b952d6SSatish Balay     rank = baij->rank;
44457b952d6SSatish Balay     if (size == 1) {
44557b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
44657b952d6SSatish Balay     }
44757b952d6SSatish Balay     else {
44857b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
44957b952d6SSatish Balay       Mat         A;
45057b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
45157b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
45257b952d6SSatish Balay       int         mbs=baij->mbs;
45357b952d6SSatish Balay       Scalar      *a;
45457b952d6SSatish Balay 
45557b952d6SSatish Balay       if (!rank) {
456cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
45757b952d6SSatish Balay         CHKERRQ(ierr);
45857b952d6SSatish Balay       }
45957b952d6SSatish Balay       else {
460cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
46157b952d6SSatish Balay         CHKERRQ(ierr);
46257b952d6SSatish Balay       }
46357b952d6SSatish Balay       PLogObjectParent(mat,A);
46457b952d6SSatish Balay 
46557b952d6SSatish Balay       /* copy over the A part */
46657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
46757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
46857b952d6SSatish Balay       row = baij->rstart;
46957b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
47057b952d6SSatish Balay 
47157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
47257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
47357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
47457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
47557b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
47657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
477cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
478cee3aa6bSSatish Balay             col++; a += bs;
47957b952d6SSatish Balay           }
48057b952d6SSatish Balay         }
48157b952d6SSatish Balay       }
48257b952d6SSatish Balay       /* copy over the B part */
48357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
48457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
48557b952d6SSatish Balay       row = baij->rstart*bs;
48657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
48757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
48857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
48957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
49057b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
49157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
492cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
493cee3aa6bSSatish Balay             col++; a += bs;
49457b952d6SSatish Balay           }
49557b952d6SSatish Balay         }
49657b952d6SSatish Balay       }
49757b952d6SSatish Balay       PetscFree(rvals);
49857b952d6SSatish Balay       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
49957b952d6SSatish Balay       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
50057b952d6SSatish Balay       if (!rank) {
50157b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
50257b952d6SSatish Balay       }
50357b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
50457b952d6SSatish Balay     }
50557b952d6SSatish Balay   }
50657b952d6SSatish Balay   return 0;
50757b952d6SSatish Balay }
50857b952d6SSatish Balay 
50957b952d6SSatish Balay 
51057b952d6SSatish Balay 
51157b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
51257b952d6SSatish Balay {
51357b952d6SSatish Balay   Mat         mat = (Mat) obj;
51457b952d6SSatish Balay   int         ierr;
51557b952d6SSatish Balay   ViewerType  vtype;
51657b952d6SSatish Balay 
51757b952d6SSatish Balay   if (!viewer) {
51857b952d6SSatish Balay     viewer = STDOUT_VIEWER_SELF;
51957b952d6SSatish Balay   }
52057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
52157b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
52257b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
52357b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
52457b952d6SSatish Balay   }
52557b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
52657b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
52757b952d6SSatish Balay   }
52857b952d6SSatish Balay   return 0;
52957b952d6SSatish Balay }
53057b952d6SSatish Balay 
53179bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
53279bdfe76SSatish Balay {
53379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
53479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
53579bdfe76SSatish Balay   int         ierr;
53679bdfe76SSatish Balay 
53779bdfe76SSatish Balay #if defined(PETSC_LOG)
53879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
53979bdfe76SSatish Balay #endif
54079bdfe76SSatish Balay 
54179bdfe76SSatish Balay   PetscFree(baij->rowners);
54279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
54379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
54479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
54579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
54679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
54779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
54879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
54979bdfe76SSatish Balay   PetscFree(baij);
55079bdfe76SSatish Balay   PLogObjectDestroy(mat);
55179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
55279bdfe76SSatish Balay   return 0;
55379bdfe76SSatish Balay }
55479bdfe76SSatish Balay 
555cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
556cee3aa6bSSatish Balay {
557cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
558cee3aa6bSSatish Balay   int        ierr;
559cee3aa6bSSatish Balay 
560cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
561cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
562cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
563cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
564cee3aa6bSSatish Balay   return 0;
565cee3aa6bSSatish Balay }
566cee3aa6bSSatish Balay 
567cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
568cee3aa6bSSatish Balay {
569cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
570cee3aa6bSSatish Balay   int        ierr;
571cee3aa6bSSatish Balay   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
572cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
573cee3aa6bSSatish Balay   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
574cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
575cee3aa6bSSatish Balay   return 0;
576cee3aa6bSSatish Balay }
577cee3aa6bSSatish Balay 
578cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
579cee3aa6bSSatish Balay {
580cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
581cee3aa6bSSatish Balay   int        ierr;
582cee3aa6bSSatish Balay 
583cee3aa6bSSatish Balay   /* do nondiagonal part */
584cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
585cee3aa6bSSatish Balay   /* send it on its way */
586cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
587cee3aa6bSSatish Balay                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
588cee3aa6bSSatish Balay   /* do local part */
589cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
590cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
591cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
592cee3aa6bSSatish Balay   /* but is not perhaps always true. */
593cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
594cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
595cee3aa6bSSatish Balay   return 0;
596cee3aa6bSSatish Balay }
597cee3aa6bSSatish Balay 
598cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
599cee3aa6bSSatish Balay {
600cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
601cee3aa6bSSatish Balay   int        ierr;
602cee3aa6bSSatish Balay 
603cee3aa6bSSatish Balay   /* do nondiagonal part */
604cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
605cee3aa6bSSatish Balay   /* send it on its way */
606cee3aa6bSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
607cee3aa6bSSatish Balay                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
608cee3aa6bSSatish Balay   /* do local part */
609cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
610cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
611cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
612cee3aa6bSSatish Balay   /* but is not perhaps always true. */
613cee3aa6bSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
614cee3aa6bSSatish Balay                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
615cee3aa6bSSatish Balay   return 0;
616cee3aa6bSSatish Balay }
617cee3aa6bSSatish Balay 
618cee3aa6bSSatish Balay /*
619cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
620cee3aa6bSSatish Balay    diagonal block
621cee3aa6bSSatish Balay */
622cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
623cee3aa6bSSatish Balay {
624cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
625cee3aa6bSSatish Balay   if (a->M != a->N)
626cee3aa6bSSatish Balay     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
627cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
628cee3aa6bSSatish Balay }
629cee3aa6bSSatish Balay 
630cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
631cee3aa6bSSatish Balay {
632cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
633cee3aa6bSSatish Balay   int        ierr;
634cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
635cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
636cee3aa6bSSatish Balay   return 0;
637cee3aa6bSSatish Balay }
63857b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
63957b952d6SSatish Balay {
64057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64157b952d6SSatish Balay   *m = mat->M; *n = mat->N;
64257b952d6SSatish Balay   return 0;
64357b952d6SSatish Balay }
64457b952d6SSatish Balay 
64557b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
64657b952d6SSatish Balay {
64757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
64857b952d6SSatish Balay   *m = mat->m; *n = mat->N;
64957b952d6SSatish Balay   return 0;
65057b952d6SSatish Balay }
65157b952d6SSatish Balay 
65257b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
65357b952d6SSatish Balay {
65457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
65557b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
65657b952d6SSatish Balay   return 0;
65757b952d6SSatish Balay }
65857b952d6SSatish Balay 
65979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
66079bdfe76SSatish Balay static struct _MatOps MatOps = {
661cee3aa6bSSatish Balay   MatSetValues_MPIBAIJ,0,0,MatMult_MPIBAIJ,
662cee3aa6bSSatish Balay   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
66379bdfe76SSatish Balay   0,0,0,0,
66479bdfe76SSatish Balay   0,0,0,0,
665cee3aa6bSSatish Balay   0,MatGetDiagonal_MPIBAIJ,0,0,
66657b952d6SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
66757b952d6SSatish Balay   0,0,MatGetReordering_MPIBAIJ,0,
66857b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
66957b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
67079bdfe76SSatish Balay   0,0,0,0,
67179bdfe76SSatish Balay   0,0,0,0,
67279bdfe76SSatish Balay   0,0,0,0,
67379bdfe76SSatish Balay   0,0,0,0,
674cee3aa6bSSatish Balay   MatScale_MPIBAIJ,0,0};
67579bdfe76SSatish Balay 
67679bdfe76SSatish Balay 
67779bdfe76SSatish Balay /*@C
67879bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
67979bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
68079bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
68179bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
68279bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
68379bdfe76SSatish Balay 
68479bdfe76SSatish Balay    Input Parameters:
68579bdfe76SSatish Balay .  comm - MPI communicator
68679bdfe76SSatish Balay .  bs   - size of blockk
68779bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
68879bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
68979bdfe76SSatish Balay            if N is given)
69079bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
69179bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
69279bdfe76SSatish Balay            if n is given)
69379bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
69479bdfe76SSatish Balay            submatrix  (same for all local rows)
69579bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
69679bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
69779bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
69879bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
69979bdfe76SSatish Balay            submatrix (same for all local rows).
70079bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
70179bdfe76SSatish Balay            submatrix or null (possibly different for each row).
70279bdfe76SSatish Balay 
70379bdfe76SSatish Balay    Output Parameter:
70479bdfe76SSatish Balay .  A - the matrix
70579bdfe76SSatish Balay 
70679bdfe76SSatish Balay    Notes:
70779bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
70879bdfe76SSatish Balay    (possibly both).
70979bdfe76SSatish Balay 
71079bdfe76SSatish Balay    Storage Information:
71179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
71279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
71379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
71479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
71579bdfe76SSatish Balay 
71679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
71779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
71879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
71979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
72079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
72179bdfe76SSatish Balay 
72279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
72379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
72479bdfe76SSatish Balay 
72579bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
72679bdfe76SSatish Balay $         -------------------
72779bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
72879bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
72979bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
73079bdfe76SSatish Balay $         -------------------
73179bdfe76SSatish Balay $
73279bdfe76SSatish Balay 
73379bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
73479bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
73579bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
73657b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
73779bdfe76SSatish Balay 
73879bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
73979bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
74079bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
74179bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
74279bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
74379bdfe76SSatish Balay 
74479bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
74579bdfe76SSatish Balay 
74679bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
74779bdfe76SSatish Balay @*/
74879bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
74979bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
75079bdfe76SSatish Balay {
75179bdfe76SSatish Balay   Mat          B;
75279bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
753cee3aa6bSSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
75479bdfe76SSatish Balay 
75579bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
75679bdfe76SSatish Balay   *A = 0;
75779bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
75879bdfe76SSatish Balay   PLogObjectCreate(B);
75979bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
76079bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
76179bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
76279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
76379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
76457b952d6SSatish Balay 
76579bdfe76SSatish Balay   B->factor     = 0;
76679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
76779bdfe76SSatish Balay 
76879bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
76979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
77079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
77179bdfe76SSatish Balay 
77279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
77357b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
774cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
775cee3aa6bSSatish Balay   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
776cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
777cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
77879bdfe76SSatish Balay 
77979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
78079bdfe76SSatish Balay     work[0] = m; work[1] = n;
78179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
78279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
78379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
78479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
78579bdfe76SSatish Balay   }
78679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
78779bdfe76SSatish Balay     Mbs = M/bs;
78879bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
78979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
79079bdfe76SSatish Balay     m   = mbs*bs;
79179bdfe76SSatish Balay   }
79279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
79379bdfe76SSatish Balay     Nbs = N/bs;
79479bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
79579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
79679bdfe76SSatish Balay     n   = nbs*bs;
79779bdfe76SSatish Balay   }
798cee3aa6bSSatish Balay   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
79979bdfe76SSatish Balay 
80079bdfe76SSatish Balay   b->m = m; B->m = m;
80179bdfe76SSatish Balay   b->n = n; B->n = n;
80279bdfe76SSatish Balay   b->N = N; B->N = N;
80379bdfe76SSatish Balay   b->M = M; B->M = M;
80479bdfe76SSatish Balay   b->bs  = bs;
80579bdfe76SSatish Balay   b->bs2 = bs*bs;
80679bdfe76SSatish Balay   b->mbs = mbs;
80779bdfe76SSatish Balay   b->nbs = nbs;
80879bdfe76SSatish Balay   b->Mbs = Mbs;
80979bdfe76SSatish Balay   b->Nbs = Nbs;
81079bdfe76SSatish Balay 
81179bdfe76SSatish Balay   /* build local table of row and column ownerships */
81279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
81379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
81479bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
81579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
81679bdfe76SSatish Balay   b->rowners[0] = 0;
81779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
81879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
81979bdfe76SSatish Balay   }
82079bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
82179bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
82279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
82379bdfe76SSatish Balay   b->cowners[0] = 0;
82479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
82579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
82679bdfe76SSatish Balay   }
82779bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
82879bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
82979bdfe76SSatish Balay 
83079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
83179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
83279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
83379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
83479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
83579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
83679bdfe76SSatish Balay 
83779bdfe76SSatish Balay   /* build cache for off array entries formed */
83879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
83979bdfe76SSatish Balay   b->colmap      = 0;
84079bdfe76SSatish Balay   b->garray      = 0;
84179bdfe76SSatish Balay   b->roworiented = 1;
84279bdfe76SSatish Balay 
84379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
84479bdfe76SSatish Balay   b->lvec      = 0;
84579bdfe76SSatish Balay   b->Mvctx     = 0;
84679bdfe76SSatish Balay 
84779bdfe76SSatish Balay   /* stuff for MatGetRow() */
84879bdfe76SSatish Balay   b->rowindices   = 0;
84979bdfe76SSatish Balay   b->rowvalues    = 0;
85079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
85179bdfe76SSatish Balay 
85279bdfe76SSatish Balay   *A = B;
85379bdfe76SSatish Balay   return 0;
85479bdfe76SSatish Balay }
85557b952d6SSatish Balay 
85657b952d6SSatish Balay #include "sys.h"
85757b952d6SSatish Balay 
85857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
85957b952d6SSatish Balay {
86057b952d6SSatish Balay   Mat          A;
86157b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
86257b952d6SSatish Balay   Scalar       *vals,*buf;
86357b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
86457b952d6SSatish Balay   MPI_Status   status;
865cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
86657b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
86757b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
86857b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
86957b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
87057b952d6SSatish Balay 
87157b952d6SSatish Balay 
87257b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
87357b952d6SSatish Balay   bs2  = bs*bs;
87457b952d6SSatish Balay 
87557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
87657b952d6SSatish Balay   if (!rank) {
87757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
87857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
879cee3aa6bSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
88057b952d6SSatish Balay   }
88157b952d6SSatish Balay 
88257b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
88357b952d6SSatish Balay   M = header[1]; N = header[2];
88457b952d6SSatish Balay 
88557b952d6SSatish Balay 
88657b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
88757b952d6SSatish Balay 
88857b952d6SSatish Balay   /*
88957b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
89057b952d6SSatish Balay      divisible by the blocksize
89157b952d6SSatish Balay   */
89257b952d6SSatish Balay   Mbs        = M/bs;
89357b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
89457b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
89557b952d6SSatish Balay   else                  Mbs++;
89657b952d6SSatish Balay   if (extra_rows &&!rank) {
89757b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
89857b952d6SSatish Balay   }
89957b952d6SSatish Balay   /* determine ownership of all rows */
90057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
90157b952d6SSatish Balay   m   = mbs * bs;
902cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
903cee3aa6bSSatish Balay   browners = rowners + size + 1;
90457b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
90557b952d6SSatish Balay   rowners[0] = 0;
906cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
907cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
90857b952d6SSatish Balay   rstart = rowners[rank];
90957b952d6SSatish Balay   rend   = rowners[rank+1];
91057b952d6SSatish Balay 
91157b952d6SSatish Balay   /* distribute row lengths to all processors */
91257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
91357b952d6SSatish Balay   if (!rank) {
91457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
91557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
91657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
91757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
918cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
919cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
92057b952d6SSatish Balay     PetscFree(sndcounts);
92157b952d6SSatish Balay   }
92257b952d6SSatish Balay   else {
92357b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
92457b952d6SSatish Balay   }
92557b952d6SSatish Balay 
92657b952d6SSatish Balay   if (!rank) {
92757b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
92857b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
92957b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
93057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
93157b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
93257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
93357b952d6SSatish Balay       }
93457b952d6SSatish Balay     }
93557b952d6SSatish Balay     PetscFree(rowlengths);
93657b952d6SSatish Balay 
93757b952d6SSatish Balay     /* determine max buffer needed and allocate it */
93857b952d6SSatish Balay     maxnz = 0;
93957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
94057b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
94157b952d6SSatish Balay     }
94257b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
94357b952d6SSatish Balay 
94457b952d6SSatish Balay     /* read in my part of the matrix column indices  */
94557b952d6SSatish Balay     nz = procsnz[0];
94657b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
94757b952d6SSatish Balay     mycols = ibuf;
948cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
94957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
950cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
951cee3aa6bSSatish Balay 
95257b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
95357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
95457b952d6SSatish Balay       nz = procsnz[i];
95557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
95657b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
95757b952d6SSatish Balay     }
95857b952d6SSatish Balay     /* read in the stuff for the last proc */
95957b952d6SSatish Balay     if ( size != 1 ) {
96057b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
96157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
96257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
963cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
96457b952d6SSatish Balay     }
96557b952d6SSatish Balay     PetscFree(cols);
96657b952d6SSatish Balay   }
96757b952d6SSatish Balay   else {
96857b952d6SSatish Balay     /* determine buffer space needed for message */
96957b952d6SSatish Balay     nz = 0;
97057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
97157b952d6SSatish Balay       nz += locrowlens[i];
97257b952d6SSatish Balay     }
97357b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
97457b952d6SSatish Balay     mycols = ibuf;
97557b952d6SSatish Balay     /* receive message of column indices*/
97657b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
97757b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
978cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
97957b952d6SSatish Balay   }
98057b952d6SSatish Balay 
98157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
982cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
983cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
98457b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
985cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
98657b952d6SSatish Balay   masked1 = mask    + Mbs;
98757b952d6SSatish Balay   masked2 = masked1 + Mbs;
98857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
98957b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
99057b952d6SSatish Balay     dcount  = 0;
99157b952d6SSatish Balay     odcount = 0;
99257b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
99357b952d6SSatish Balay       kmax = locrowlens[rowcount];
99457b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
99557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
99657b952d6SSatish Balay         if (!mask[tmp]) {
99757b952d6SSatish Balay           mask[tmp] = 1;
99857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
99957b952d6SSatish Balay           else masked1[dcount++] = tmp;
100057b952d6SSatish Balay         }
100157b952d6SSatish Balay       }
100257b952d6SSatish Balay       rowcount++;
100357b952d6SSatish Balay     }
1004cee3aa6bSSatish Balay 
100557b952d6SSatish Balay     dlens[i]  = dcount;
100657b952d6SSatish Balay     odlens[i] = odcount;
1007cee3aa6bSSatish Balay 
100857b952d6SSatish Balay     /* zero out the mask elements we set */
100957b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
101057b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
101157b952d6SSatish Balay   }
1012cee3aa6bSSatish Balay 
101357b952d6SSatish Balay   /* create our matrix */
101457b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
101557b952d6SSatish Balay   A = *newmat;
101657b952d6SSatish Balay   MatSetOption(A,COLUMNS_SORTED);
101757b952d6SSatish Balay 
101857b952d6SSatish Balay   if (!rank) {
101957b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
102057b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
102157b952d6SSatish Balay     nz = procsnz[0];
102257b952d6SSatish Balay     vals = buf;
1023cee3aa6bSSatish Balay     mycols = ibuf;
1024cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
102557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1026cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
102757b952d6SSatish Balay     /* insert into matrix */
102857b952d6SSatish Balay     jj      = rstart*bs;
102957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
103057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
103157b952d6SSatish Balay       mycols += locrowlens[i];
103257b952d6SSatish Balay       vals   += locrowlens[i];
103357b952d6SSatish Balay       jj++;
103457b952d6SSatish Balay     }
103557b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
103657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
103757b952d6SSatish Balay       nz = procsnz[i];
103857b952d6SSatish Balay       vals = buf;
103957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
104057b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
104157b952d6SSatish Balay     }
104257b952d6SSatish Balay     /* the last proc */
104357b952d6SSatish Balay     if ( size != 1 ){
104457b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1045cee3aa6bSSatish Balay       vals = buf;
104657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
104757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1048cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
104957b952d6SSatish Balay     }
105057b952d6SSatish Balay     PetscFree(procsnz);
105157b952d6SSatish Balay   }
105257b952d6SSatish Balay   else {
105357b952d6SSatish Balay     /* receive numeric values */
105457b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
105557b952d6SSatish Balay 
105657b952d6SSatish Balay     /* receive message of values*/
105757b952d6SSatish Balay     vals = buf;
1058cee3aa6bSSatish Balay     mycols = ibuf;
105957b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
106057b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1061cee3aa6bSSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
106257b952d6SSatish Balay 
106357b952d6SSatish Balay     /* insert into matrix */
106457b952d6SSatish Balay     jj      = rstart*bs;
1065cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
106657b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
106757b952d6SSatish Balay       mycols += locrowlens[i];
106857b952d6SSatish Balay       vals   += locrowlens[i];
106957b952d6SSatish Balay       jj++;
107057b952d6SSatish Balay     }
107157b952d6SSatish Balay   }
107257b952d6SSatish Balay   PetscFree(locrowlens);
107357b952d6SSatish Balay   PetscFree(buf);
107457b952d6SSatish Balay   PetscFree(ibuf);
107557b952d6SSatish Balay   PetscFree(rowners);
107657b952d6SSatish Balay   PetscFree(dlens);
1077cee3aa6bSSatish Balay   PetscFree(mask);
107857b952d6SSatish Balay   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
107957b952d6SSatish Balay   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
108057b952d6SSatish Balay   return 0;
108157b952d6SSatish Balay }
108257b952d6SSatish Balay 
108357b952d6SSatish Balay 
1084