xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 47b4a8ea77628bb8b5f28a18a3fcb28317589dba)
1 #ifndef lint
2 static char vcid[] = "$Id: mpibaij.c,v 1.10 1996/07/08 01:18:17 curfman Exp curfman $";
3 #endif
4 
5 #include "mpibaij.h"
6 
7 
8 #include "draw.h"
9 #include "pinclude/pviewer.h"
10 
11 extern int MatSetUpMultiply_MPIBAIJ(Mat);
12 extern int DisAssemble_MPIBAIJ(Mat);
13 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 static int CreateColmap_MPIBAIJ_Private(Mat mat)
22 {
23   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
24   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
25   int        nbs = B->nbs,i;
26 
27   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
28   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
29   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
30   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
31   return 0;
32 }
33 
34 
35 static int MatGetReordering_MPIBAIJ(Mat mat,MatReordering type,IS *rperm,IS *cperm)
36 {
37   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
38   int         ierr;
39   if (baij->size == 1) {
40     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
41   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
42   return 0;
43 }
44 
45 static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
46 {
47   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
48   Scalar      value;
49   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
50   int         cstart = baij->cstart, cend = baij->cend,row,col;
51   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
52   int         cstart_orig,cend_orig,bs=baij->bs;
53 
54   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
55     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
56   }
57   baij->insertmode = addv;
58   rstart_orig = rstart*bs;
59   rend_orig   = rend*bs;
60   cstart_orig = cstart*bs;
61   cend_orig   = cend*bs;
62   for ( i=0; i<m; i++ ) {
63     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
64     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
65     if (im[i] >= rstart_orig && im[i] < rend_orig) {
66       row = im[i] - rstart_orig;
67       for ( j=0; j<n; j++ ) {
68         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
69         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
70         if (in[j] >= cstart_orig && in[j] < cend_orig){
71           col = in[j] - cstart_orig;
72           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
73           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
74         }
75         else {
76           if (mat->was_assembled) {
77             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
78             col = baij->colmap[in[j]];
79             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
80               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
81               col =  in[j];
82             }
83           }
84           else col = in[j];
85           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
86           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
87         }
88       }
89     }
90     else {
91       if (roworiented) {
92         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
93       }
94       else {
95         row = im[i];
96         for ( j=0; j<n; j++ ) {
97           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
98         }
99       }
100     }
101   }
102   return 0;
103 }
104 
105 static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
108   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
109   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
110 
111   for ( i=0; i<m; i++ ) {
112     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative row");
113     if (idxm[i] >= baij->M) SETERRQ(1,"MatGetValues_MPIBAIJ:Row too large");
114     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
115       row = idxm[i] - bsrstart;
116       for ( j=0; j<n; j++ ) {
117         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIBAIJ:Negative column");
118         if (idxn[j] >= baij->N) SETERRQ(1,"MatGetValues_MPIBAIJ:Col too large");
119         if (idxn[j] >= bscstart && idxn[j] < bscend){
120           col = idxn[j] - bscstart;
121           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
122         }
123         else {
124           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
125           if ( baij->garray[baij->colmap[idxn[j]/bs]] != idxn[j]/bs ) *(v+i*n+j) = 0.0;
126           else {
127             col = baij->colmap[idxn[j]/bs]*bs + idxn[j]%bs;
128             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
129           }
130         }
131       }
132     }
133     else {
134       SETERRQ(1,"MatGetValues_MPIBAIJ:Only local values currently supported");
135     }
136   }
137   return 0;
138 }
139 
140 static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
141 {
142   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
143   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
144   int        ierr, i,bs2=baij->bs2;
145   double     sum = 0.0;
146   Scalar     *v;
147 
148   if (baij->size == 1) {
149     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
150   } else {
151     if (type == NORM_FROBENIUS) {
152       v = amat->a;
153       for (i=0; i<amat->nz*bs2; i++ ) {
154 #if defined(PETSC_COMPLEX)
155         sum += real(conj(*v)*(*v)); v++;
156 #else
157         sum += (*v)*(*v); v++;
158 #endif
159       }
160       v = bmat->a;
161       for (i=0; i<bmat->nz*bs2; i++ ) {
162 #if defined(PETSC_COMPLEX)
163         sum += real(conj(*v)*(*v)); v++;
164 #else
165         sum += (*v)*(*v); v++;
166 #endif
167       }
168       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
169       *norm = sqrt(*norm);
170     }
171     else
172       SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
173   }
174   return 0;
175 }
176 
177 static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
178 {
179   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
180   MPI_Comm    comm = mat->comm;
181   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
182   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
183   MPI_Request *send_waits,*recv_waits;
184   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
185   InsertMode  addv;
186   Scalar      *rvalues,*svalues;
187 
188   /* make sure all processors are either in INSERTMODE or ADDMODE */
189   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
190   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
192   }
193   baij->insertmode = addv; /* in case this processor had no cache */
194 
195   /*  first count number of contributors to each processor */
196   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
197   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
198   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
199   for ( i=0; i<baij->stash.n; i++ ) {
200     idx = baij->stash.idx[i];
201     for ( j=0; j<size; j++ ) {
202       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
203         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
204       }
205     }
206   }
207   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
208 
209   /* inform other processors of number of messages and max length*/
210   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
211   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
212   nreceives = work[rank];
213   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
214   nmax = work[rank];
215   PetscFree(work);
216 
217   /* post receives:
218        1) each message will consist of ordered pairs
219      (global index,value) we store the global index as a double
220      to simplify the message passing.
221        2) since we don't know how long each individual message is we
222      allocate the largest needed buffer for each receive. Potentially
223      this is a lot of wasted space.
224 
225 
226        This could be done better.
227   */
228   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
229   CHKPTRQ(rvalues);
230   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
231   CHKPTRQ(recv_waits);
232   for ( i=0; i<nreceives; i++ ) {
233     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
234               comm,recv_waits+i);
235   }
236 
237   /* do sends:
238       1) starts[i] gives the starting index in svalues for stuff going to
239          the ith processor
240   */
241   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
242   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
243   CHKPTRQ(send_waits);
244   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
245   starts[0] = 0;
246   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
247   for ( i=0; i<baij->stash.n; i++ ) {
248     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
249     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
250     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
251   }
252   PetscFree(owner);
253   starts[0] = 0;
254   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
255   count = 0;
256   for ( i=0; i<size; i++ ) {
257     if (procs[i]) {
258       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
259                 comm,send_waits+count++);
260     }
261   }
262   PetscFree(starts); PetscFree(nprocs);
263 
264   /* Free cache space */
265   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
266   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
267 
268   baij->svalues    = svalues;    baij->rvalues    = rvalues;
269   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
270   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
271   baij->rmax       = nmax;
272 
273   return 0;
274 }
275 
276 
277 static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
278 {
279   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
280   MPI_Status  *send_status,recv_status;
281   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
282   int         bs=baij->bs,row,col,other_disassembled;
283   Scalar      *values,val;
284   InsertMode  addv = baij->insertmode;
285 
286   /*  wait on receives */
287   while (count) {
288     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
289     /* unpack receives into our local space */
290     values = baij->rvalues + 3*imdex*baij->rmax;
291     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
292     n = n/3;
293     for ( i=0; i<n; i++ ) {
294       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
295       col = (int) PetscReal(values[3*i+1]);
296       val = values[3*i+2];
297       if (col >= baij->cstart*bs && col < baij->cend*bs) {
298         col -= baij->cstart*bs;
299         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
300       }
301       else {
302         if (mat->was_assembled) {
303           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
304           col = baij->colmap[col/bs]*bs + col%bs;
305           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
306             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
307             col = (int) PetscReal(values[3*i+1]);
308           }
309         }
310         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
311       }
312     }
313     count--;
314   }
315   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
316 
317   /* wait on sends */
318   if (baij->nsends) {
319     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
320     CHKPTRQ(send_status);
321     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
322     PetscFree(send_status);
323   }
324   PetscFree(baij->send_waits); PetscFree(baij->svalues);
325 
326   baij->insertmode = NOT_SET_VALUES;
327   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
329 
330   /* determine if any processor has disassembled, if so we must
331      also disassemble ourselfs, in order that we may reassemble. */
332   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
333   if (mat->was_assembled && !other_disassembled) {
334     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
335   }
336 
337   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
338     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
339   }
340   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
342 
343   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
344   return 0;
345 }
346 
347 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
348 {
349   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
350   int          ierr;
351 
352   if (baij->size == 1) {
353     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
354   }
355   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
356   return 0;
357 }
358 
359 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
360 {
361   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
362   int          ierr, format,rank,bs=baij->bs;
363   FILE         *fd;
364   ViewerType   vtype;
365 
366   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
367   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
368     ierr = ViewerGetFormat(viewer,&format);
369     if (format == ASCII_FORMAT_INFO_DETAILED) {
370       int nz, nzalloc, mem;
371       MPI_Comm_rank(mat->comm,&rank);
372       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
373       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
374       PetscSequentialPhaseBegin(mat->comm,1);
375       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
376               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
377       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
378       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
379       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
380       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
381       fflush(fd);
382       PetscSequentialPhaseEnd(mat->comm,1);
383       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
384       return 0;
385     }
386     else if (format == ASCII_FORMAT_INFO) {
387       return 0;
388     }
389   }
390 
391   if (vtype == DRAW_VIEWER) {
392     Draw       draw;
393     PetscTruth isnull;
394     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
395     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
396   }
397 
398   if (vtype == ASCII_FILE_VIEWER) {
399     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
400     PetscSequentialPhaseBegin(mat->comm,1);
401     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
402            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
403             baij->cstart*bs,baij->cend*bs);
404     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
405     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
406     fflush(fd);
407     PetscSequentialPhaseEnd(mat->comm,1);
408   }
409   else {
410     int size = baij->size;
411     rank = baij->rank;
412     if (size == 1) {
413       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
414     }
415     else {
416       /* assemble the entire matrix onto first processor. */
417       Mat         A;
418       Mat_SeqBAIJ *Aloc;
419       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
420       int         mbs=baij->mbs;
421       Scalar      *a;
422 
423       if (!rank) {
424         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
425         CHKERRQ(ierr);
426       }
427       else {
428         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
429         CHKERRQ(ierr);
430       }
431       PLogObjectParent(mat,A);
432 
433       /* copy over the A part */
434       Aloc = (Mat_SeqBAIJ*) baij->A->data;
435       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
436       row = baij->rstart;
437       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
438 
439       for ( i=0; i<mbs; i++ ) {
440         rvals[0] = bs*(baij->rstart + i);
441         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
442         for ( j=ai[i]; j<ai[i+1]; j++ ) {
443           col = (baij->cstart+aj[j])*bs;
444           for (k=0; k<bs; k++ ) {
445             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
446             col++; a += bs;
447           }
448         }
449       }
450       /* copy over the B part */
451       Aloc = (Mat_SeqBAIJ*) baij->B->data;
452       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
453       row = baij->rstart*bs;
454       for ( i=0; i<mbs; i++ ) {
455         rvals[0] = bs*(baij->rstart + i);
456         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
457         for ( j=ai[i]; j<ai[i+1]; j++ ) {
458           col = baij->garray[aj[j]]*bs;
459           for (k=0; k<bs; k++ ) {
460             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
461             col++; a += bs;
462           }
463         }
464       }
465       PetscFree(rvals);
466       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
467       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
468       if (!rank) {
469         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
470       }
471       ierr = MatDestroy(A); CHKERRQ(ierr);
472     }
473   }
474   return 0;
475 }
476 
477 
478 
479 static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
480 {
481   Mat         mat = (Mat) obj;
482   int         ierr;
483   ViewerType  vtype;
484 
485   if (!viewer) {
486     viewer = STDOUT_VIEWER_SELF;
487   }
488   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
489   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
490       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
491     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
492   }
493   else if (vtype == BINARY_FILE_VIEWER) {
494     return MatView_MPIBAIJ_Binary(mat,viewer);
495   }
496   return 0;
497 }
498 
499 static int MatDestroy_MPIBAIJ(PetscObject obj)
500 {
501   Mat         mat = (Mat) obj;
502   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
503   int         ierr;
504 
505 #if defined(PETSC_LOG)
506   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
507 #endif
508 
509   PetscFree(baij->rowners);
510   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
511   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
512   if (baij->colmap) PetscFree(baij->colmap);
513   if (baij->garray) PetscFree(baij->garray);
514   if (baij->lvec)   VecDestroy(baij->lvec);
515   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
516   if (baij->rowvalues) PetscFree(baij->rowvalues);
517   PetscFree(baij);
518   PLogObjectDestroy(mat);
519   PetscHeaderDestroy(mat);
520   return 0;
521 }
522 
523 static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
524 {
525   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
526   int         ierr, nt;
527 
528   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
529   if (nt != a->n) {
530     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
531   }
532   ierr = VecGetLocalSize(yy,&nt);  CHKERRQ(ierr);
533   if (nt != a->m) {
534     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and yy");
535   }
536   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
537   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
538   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
539   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
540   return 0;
541 }
542 
543 static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
544 {
545   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
546   int        ierr;
547   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
548   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
549   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
550   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
551   return 0;
552 }
553 
554 static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
555 {
556   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
557   int        ierr;
558 
559   /* do nondiagonal part */
560   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
561   /* send it on its way */
562   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
563                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
564   /* do local part */
565   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
566   /* receive remote parts: note this assumes the values are not actually */
567   /* inserted in yy until the next line, which is true for my implementation*/
568   /* but is not perhaps always true. */
569   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
570                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
571   return 0;
572 }
573 
574 static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
575 {
576   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
577   int        ierr;
578 
579   /* do nondiagonal part */
580   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
581   /* send it on its way */
582   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
583                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
584   /* do local part */
585   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
586   /* receive remote parts: note this assumes the values are not actually */
587   /* inserted in yy until the next line, which is true for my implementation*/
588   /* but is not perhaps always true. */
589   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
590                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
591   return 0;
592 }
593 
594 /*
595   This only works correctly for square matrices where the subblock A->A is the
596    diagonal block
597 */
598 static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
599 {
600   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
601   if (a->M != a->N)
602     SETERRQ(1,"MatGetDiagonal_MPIBAIJ:Supports only square matrix where A->A is diag block");
603   return MatGetDiagonal(a->A,v);
604 }
605 
606 static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
607 {
608   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
609   int        ierr;
610   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
611   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
612   return 0;
613 }
614 static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
615 {
616   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
617   *m = mat->M; *n = mat->N;
618   return 0;
619 }
620 
621 static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
622 {
623   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
624   *m = mat->m; *n = mat->N;
625   return 0;
626 }
627 
628 static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
629 {
630   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
631   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
632   return 0;
633 }
634 
635 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
636 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
637 
638 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
639 {
640   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
641   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
642   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
643   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
644   int        *cmap, *idx_p,cstart = mat->cstart;
645 
646   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIBAIJ:Already active");
647   mat->getrowactive = PETSC_TRUE;
648 
649   if (!mat->rowvalues && (idx || v)) {
650     /*
651         allocate enough space to hold information from the longest row.
652     */
653     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
654     int     max = 1,mbs = mat->mbs,tmp;
655     for ( i=0; i<mbs; i++ ) {
656       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
657       if (max < tmp) { max = tmp; }
658     }
659     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
660     CHKPTRQ(mat->rowvalues);
661     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
662   }
663 
664 
665   if (row < brstart || row >= brend) SETERRQ(1,"MatGetRow_MPIBAIJ:Only local rows")
666   lrow = row - brstart;
667 
668   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
669   if (!v)   {pvA = 0; pvB = 0;}
670   if (!idx) {pcA = 0; if (!v) pcB = 0;}
671   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
672   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
673   nztot = nzA + nzB;
674 
675   cmap  = mat->garray;
676   if (v  || idx) {
677     if (nztot) {
678       /* Sort by increasing column numbers, assuming A and B already sorted */
679       int imark = -1;
680       if (v) {
681         *v = v_p = mat->rowvalues;
682         for ( i=0; i<nzB; i++ ) {
683           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
684           else break;
685         }
686         imark = i;
687         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
688         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
689       }
690       if (idx) {
691         *idx = idx_p = mat->rowindices;
692         if (imark > -1) {
693           for ( i=0; i<imark; i++ ) {
694             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
695           }
696         } else {
697           for ( i=0; i<nzB; i++ ) {
698             if (cmap[cworkB[i]/bs] < cstart)
699               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
700             else break;
701           }
702           imark = i;
703         }
704         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
705         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
706       }
707     }
708     else {
709       if (idx) *idx = 0;
710       if (v)   *v   = 0;
711     }
712   }
713   *nz = nztot;
714   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
715   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
716   return 0;
717 }
718 
719 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
720 {
721   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
722   if (baij->getrowactive == PETSC_FALSE) {
723     SETERRQ(1,"MatRestoreRow_MPIBAIJ:MatGetRow not called");
724   }
725   baij->getrowactive = PETSC_FALSE;
726   return 0;
727 }
728 
729 /* -------------------------------------------------------------------*/
730 static struct _MatOps MatOps = {
731   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
732   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
733   0,0,0,0,
734   0,0,0,0,
735   0,MatGetDiagonal_MPIBAIJ,0,MatNorm_MPIBAIJ,
736   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
737   0,0,MatGetReordering_MPIBAIJ,0,
738   0,0,0,MatGetSize_MPIBAIJ,
739   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
740   0,0,0,0,
741   0,0,0,0,
742   0,0,0,MatGetSubMatrices_MPIBAIJ,
743   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,0,
744   MatScale_MPIBAIJ,0,0};
745 
746 
747 /*@C
748    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
749    (block compressed row).  For good matrix assembly performance
750    the user should preallocate the matrix storage by setting the parameters
751    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
752    performance can be increased by more than a factor of 50.
753 
754    Input Parameters:
755 .  comm - MPI communicator
756 .  bs   - size of blockk
757 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
758            This value should be the same as the local size used in creating the
759            y vector for the matrix-vector product y = Ax.
760 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
761            This value should be the same as the local size used in creating the
762            x vector for the matrix-vector product y = Ax.
763 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
764 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
765 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
766            submatrix  (same for all local rows)
767 .  d_nzz - array containing the number of block nonzeros in the various block rows
768            of the in diagonal portion of the local (possibly different for each block
769            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
770            it is zero.
771 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
772            submatrix (same for all local rows).
773 .  o_nzz - array containing the number of nonzeros in the various block rows of the
774            off-diagonal portion of the local submatrix (possibly different for
775            each block row) or PETSC_NULL.
776 
777    Output Parameter:
778 .  A - the matrix
779 
780    Notes:
781    The user MUST specify either the local or global matrix dimensions
782    (possibly both).
783 
784    Storage Information:
785    For a square global matrix we define each processor's diagonal portion
786    to be its local rows and the corresponding columns (a square submatrix);
787    each processor's off-diagonal portion encompasses the remainder of the
788    local matrix (a rectangular submatrix).
789 
790    The user can specify preallocated storage for the diagonal part of
791    the local submatrix with either d_nz or d_nnz (not both).  Set
792    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
793    memory allocation.  Likewise, specify preallocated storage for the
794    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
795 
796    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
797    the figure below we depict these three local rows and all columns (0-11).
798 
799 $          0 1 2 3 4 5 6 7 8 9 10 11
800 $         -------------------
801 $  row 3  |  o o o d d d o o o o o o
802 $  row 4  |  o o o d d d o o o o o o
803 $  row 5  |  o o o d d d o o o o o o
804 $         -------------------
805 $
806 
807    Thus, any entries in the d locations are stored in the d (diagonal)
808    submatrix, and any entries in the o locations are stored in the
809    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
810    stored simply in the MATSEQBAIJ format for compressed row storage.
811 
812    Now d_nz should indicate the number of nonzeros per row in the d matrix,
813    and o_nz should indicate the number of nonzeros per row in the o matrix.
814    In general, for PDE problems in which most nonzeros are near the diagonal,
815    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
816    or you will get TERRIBLE performance; see the users' manual chapter on
817    matrices and the file $(PETSC_DIR)/Performance.
818 
819 .keywords: matrix, block, aij, compressed row, sparse, parallel
820 
821 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
822 @*/
823 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
824                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
825 {
826   Mat          B;
827   Mat_MPIBAIJ  *b;
828   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE;
829 
830   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
831   *A = 0;
832   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
833   PLogObjectCreate(B);
834   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
835   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
836   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
837   B->destroy    = MatDestroy_MPIBAIJ;
838   B->view       = MatView_MPIBAIJ;
839 
840   B->factor     = 0;
841   B->assembled  = PETSC_FALSE;
842 
843   b->insertmode = NOT_SET_VALUES;
844   MPI_Comm_rank(comm,&b->rank);
845   MPI_Comm_size(comm,&b->size);
846 
847   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
848     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
849   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,"MatCreateMPIBAIJ: either M or m should be specified");
850   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,"MatCreateMPIBAIJ: either N or n should be specified");
851   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
852   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
853 
854   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
855     work[0] = m; work[1] = n;
856     mbs = m/bs; nbs = n/bs;
857     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
858     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
859     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
860   }
861   if (m == PETSC_DECIDE) {
862     Mbs = M/bs;
863     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
864     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
865     m   = mbs*bs;
866   }
867   if (n == PETSC_DECIDE) {
868     Nbs = N/bs;
869     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
870     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
871     n   = nbs*bs;
872   }
873   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
874 
875   b->m = m; B->m = m;
876   b->n = n; B->n = n;
877   b->N = N; B->N = N;
878   b->M = M; B->M = M;
879   b->bs  = bs;
880   b->bs2 = bs*bs;
881   b->mbs = mbs;
882   b->nbs = nbs;
883   b->Mbs = Mbs;
884   b->Nbs = Nbs;
885 
886   /* build local table of row and column ownerships */
887   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
888   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
889   b->cowners = b->rowners + b->size + 1;
890   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
891   b->rowners[0] = 0;
892   for ( i=2; i<=b->size; i++ ) {
893     b->rowners[i] += b->rowners[i-1];
894   }
895   b->rstart = b->rowners[b->rank];
896   b->rend   = b->rowners[b->rank+1];
897   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
898   b->cowners[0] = 0;
899   for ( i=2; i<=b->size; i++ ) {
900     b->cowners[i] += b->cowners[i-1];
901   }
902   b->cstart = b->cowners[b->rank];
903   b->cend   = b->cowners[b->rank+1];
904 
905   if (d_nz == PETSC_DEFAULT) d_nz = 5;
906   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
907   PLogObjectParent(B,b->A);
908   if (o_nz == PETSC_DEFAULT) o_nz = 0;
909   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
910   PLogObjectParent(B,b->B);
911 
912   /* build cache for off array entries formed */
913   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
914   b->colmap      = 0;
915   b->garray      = 0;
916   b->roworiented = 1;
917 
918   /* stuff used for matrix vector multiply */
919   b->lvec      = 0;
920   b->Mvctx     = 0;
921 
922   /* stuff for MatGetRow() */
923   b->rowindices   = 0;
924   b->rowvalues    = 0;
925   b->getrowactive = PETSC_FALSE;
926 
927   *A = B;
928   return 0;
929 }
930 
931 #include "sys.h"
932 
933 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
934 {
935   Mat          A;
936   int          i, nz, ierr, j,rstart, rend, fd;
937   Scalar       *vals,*buf;
938   MPI_Comm     comm = ((PetscObject)viewer)->comm;
939   MPI_Status   status;
940   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
941   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
942   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
943   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
944   int          dcount,kmax,k,nzcount,tmp;
945 
946 
947   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
948   bs2  = bs*bs;
949 
950   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
951   if (!rank) {
952     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
953     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
954     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIBAIJ:not matrix object");
955   }
956 
957   MPI_Bcast(header+1,3,MPI_INT,0,comm);
958   M = header[1]; N = header[2];
959 
960 
961   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
962 
963   /*
964      This code adds extra rows to make sure the number of rows is
965      divisible by the blocksize
966   */
967   Mbs        = M/bs;
968   extra_rows = bs - M + bs*(Mbs);
969   if (extra_rows == bs) extra_rows = 0;
970   else                  Mbs++;
971   if (extra_rows &&!rank) {
972     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
973   }
974   /* determine ownership of all rows */
975   mbs = Mbs/size + ((Mbs % size) > rank);
976   m   = mbs * bs;
977   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
978   browners = rowners + size + 1;
979   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
980   rowners[0] = 0;
981   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
982   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
983   rstart = rowners[rank];
984   rend   = rowners[rank+1];
985 
986   /* distribute row lengths to all processors */
987   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
988   if (!rank) {
989     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
990     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
991     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
992     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
993     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
994     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
995     PetscFree(sndcounts);
996   }
997   else {
998     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
999   }
1000 
1001   if (!rank) {
1002     /* calculate the number of nonzeros on each processor */
1003     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1004     PetscMemzero(procsnz,size*sizeof(int));
1005     for ( i=0; i<size; i++ ) {
1006       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
1007         procsnz[i] += rowlengths[j];
1008       }
1009     }
1010     PetscFree(rowlengths);
1011 
1012     /* determine max buffer needed and allocate it */
1013     maxnz = 0;
1014     for ( i=0; i<size; i++ ) {
1015       maxnz = PetscMax(maxnz,procsnz[i]);
1016     }
1017     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1018 
1019     /* read in my part of the matrix column indices  */
1020     nz = procsnz[0];
1021     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
1022     mycols = ibuf;
1023     if (size == 1)  nz -= extra_rows;
1024     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1025     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1026 
1027     /* read in every ones (except the last) and ship off */
1028     for ( i=1; i<size-1; i++ ) {
1029       nz = procsnz[i];
1030       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1031       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1032     }
1033     /* read in the stuff for the last proc */
1034     if ( size != 1 ) {
1035       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1036       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1037       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1038       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1039     }
1040     PetscFree(cols);
1041   }
1042   else {
1043     /* determine buffer space needed for message */
1044     nz = 0;
1045     for ( i=0; i<m; i++ ) {
1046       nz += locrowlens[i];
1047     }
1048     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
1049     mycols = ibuf;
1050     /* receive message of column indices*/
1051     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1052     MPI_Get_count(&status,MPI_INT,&maxnz);
1053     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
1054   }
1055 
1056   /* loop over local rows, determining number of off diagonal entries */
1057   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1058   odlens = dlens + (rend-rstart);
1059   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1060   PetscMemzero(mask,3*Mbs*sizeof(int));
1061   masked1 = mask    + Mbs;
1062   masked2 = masked1 + Mbs;
1063   rowcount = 0; nzcount = 0;
1064   for ( i=0; i<mbs; i++ ) {
1065     dcount  = 0;
1066     odcount = 0;
1067     for ( j=0; j<bs; j++ ) {
1068       kmax = locrowlens[rowcount];
1069       for ( k=0; k<kmax; k++ ) {
1070         tmp = mycols[nzcount++]/bs;
1071         if (!mask[tmp]) {
1072           mask[tmp] = 1;
1073           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
1074           else masked1[dcount++] = tmp;
1075         }
1076       }
1077       rowcount++;
1078     }
1079 
1080     dlens[i]  = dcount;
1081     odlens[i] = odcount;
1082 
1083     /* zero out the mask elements we set */
1084     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
1085     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
1086   }
1087 
1088   /* create our matrix */
1089   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
1090   A = *newmat;
1091   MatSetOption(A,COLUMNS_SORTED);
1092 
1093   if (!rank) {
1094     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
1095     /* read in my part of the matrix numerical values  */
1096     nz = procsnz[0];
1097     vals = buf;
1098     mycols = ibuf;
1099     if (size == 1)  nz -= extra_rows;
1100     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1101     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1102     /* insert into matrix */
1103     jj      = rstart*bs;
1104     for ( i=0; i<m; i++ ) {
1105       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
1106       mycols += locrowlens[i];
1107       vals   += locrowlens[i];
1108       jj++;
1109     }
1110     /* read in other processors( except the last one) and ship out */
1111     for ( i=1; i<size-1; i++ ) {
1112       nz = procsnz[i];
1113       vals = buf;
1114       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1115       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1116     }
1117     /* the last proc */
1118     if ( size != 1 ){
1119       nz = procsnz[i] - extra_rows;
1120       vals = buf;
1121       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1122       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1123       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1124     }
1125     PetscFree(procsnz);
1126   }
1127   else {
1128     /* receive numeric values */
1129     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
1130 
1131     /* receive message of values*/
1132     vals = buf;
1133     mycols = ibuf;
1134     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1135     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1136     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIBAIJ:something is wrong with file");
1137 
1138     /* insert into matrix */
1139     jj      = rstart*bs;
1140     for ( i=0; i<m; i++ ) {
1141       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
1142       mycols += locrowlens[i];
1143       vals   += locrowlens[i];
1144       jj++;
1145     }
1146   }
1147   PetscFree(locrowlens);
1148   PetscFree(buf);
1149   PetscFree(ibuf);
1150   PetscFree(rowners);
1151   PetscFree(dlens);
1152   PetscFree(mask);
1153   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1154   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1155   return 0;
1156 }
1157 
1158 
1159