xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 57b952d6c22e98ea0d8a99e8978d81f652deb08e)
179bdfe76SSatish Balay #ifndef lint
2*57b952d6SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.2 1996/06/04 21:40:25 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
579bdfe76SSatish Balay #include "mpibaij.h"
679bdfe76SSatish Balay 
779bdfe76SSatish Balay 
8*57b952d6SSatish Balay #include "draw.h"
9*57b952d6SSatish Balay #include "pinclude/pviewer.h"
10*57b952d6SSatish Balay 
11*57b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
12*57b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13*57b952d6SSatish Balay 
14*57b952d6SSatish Balay /* local utility routine that creates a mapping from the global column
15*57b952d6SSatish Balay number to the local number in the off-diagonal part of the local
16*57b952d6SSatish Balay storage of the matrix.  This is done in a non scable way since the
17*57b952d6SSatish Balay length of colmap equals the global matrix length.
18*57b952d6SSatish Balay */
19*57b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
20*57b952d6SSatish Balay {
21*57b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22*57b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
23*57b952d6SSatish Balay   int        nbs = B->nbs,i;
24*57b952d6SSatish Balay 
25*57b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
26*57b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
27*57b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
28*57b952d6SSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i;
29*57b952d6SSatish Balay   return 0;
30*57b952d6SSatish Balay }
31*57b952d6SSatish Balay 
32*57b952d6SSatish Balay 
33*57b952d6SSatish Balay static int MatGetReordering_MPIBAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
34*57b952d6SSatish Balay {
35*57b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
36*57b952d6SSatish Balay   int         ierr;
37*57b952d6SSatish Balay   if (baij->size == 1) {
38*57b952d6SSatish Balay     ierr = MatGetReordering(baij->A,type,rperm,cperm); CHKERRQ(ierr);
39*57b952d6SSatish Balay   } else SETERRQ(1,"MatGetReordering_MPIBAIJ:not supported in parallel");
40*57b952d6SSatish Balay   return 0;
41*57b952d6SSatish Balay }
42*57b952d6SSatish Balay 
43*57b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
44*57b952d6SSatish Balay {
45*57b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
46*57b952d6SSatish Balay   Scalar      value;
47*57b952d6SSatish Balay   int         ierr,i,j, rstart = baij->rstart, rend = baij->rend;
48*57b952d6SSatish Balay   int         cstart = baij->cstart, cend = baij->cend,row,col;
49*57b952d6SSatish Balay   int         roworiented = baij->roworiented,rstart_orig,rend_orig;
50*57b952d6SSatish Balay   int         cstart_orig,cend_orig,bs=baij->bs;
51*57b952d6SSatish Balay 
52*57b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
53*57b952d6SSatish Balay     SETERRQ(1,"MatSetValues_MPIBAIJ:Cannot mix inserts and adds");
54*57b952d6SSatish Balay   }
55*57b952d6SSatish Balay   baij->insertmode = addv;
56*57b952d6SSatish Balay   rstart_orig = rstart*bs;
57*57b952d6SSatish Balay   rend_orig   = rend*bs;
58*57b952d6SSatish Balay   cstart_orig = cstart*bs;
59*57b952d6SSatish Balay   cend_orig   = cend*bs;
60*57b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
61*57b952d6SSatish Balay     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative row");
62*57b952d6SSatish Balay     if (im[i] >= baij->M) SETERRQ(1,"MatSetValues_MPIBAIJ:Row too large");
63*57b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
64*57b952d6SSatish Balay       row = im[i] - rstart_orig;
65*57b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
66*57b952d6SSatish Balay         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIBAIJ:Negative column");
67*57b952d6SSatish Balay         if (in[j] >= baij->N) SETERRQ(1,"MatSetValues_MPIBAIJ:Col too large");
68*57b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
69*57b952d6SSatish Balay           col = in[j] - cstart_orig;
70*57b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
71*57b952d6SSatish Balay           ierr = MatSetValues(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
72*57b952d6SSatish Balay         }
73*57b952d6SSatish Balay         else {
74*57b952d6SSatish Balay           if (mat->was_assembled) {
75*57b952d6SSatish Balay             if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);}
76*57b952d6SSatish Balay             col = baij->colmap[in[j]];
77*57b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
78*57b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
79*57b952d6SSatish Balay               col =  in[j];
80*57b952d6SSatish Balay             }
81*57b952d6SSatish Balay           }
82*57b952d6SSatish Balay           else col = in[j];
83*57b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
84*57b952d6SSatish Balay           ierr = MatSetValues(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
85*57b952d6SSatish Balay         }
86*57b952d6SSatish Balay       }
87*57b952d6SSatish Balay     }
88*57b952d6SSatish Balay     else {
89*57b952d6SSatish Balay       if (roworiented) {
90*57b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
91*57b952d6SSatish Balay       }
92*57b952d6SSatish Balay       else {
93*57b952d6SSatish Balay         row = im[i];
94*57b952d6SSatish Balay         for ( j=0; j<n; j++ ) {
95*57b952d6SSatish Balay           ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
96*57b952d6SSatish Balay         }
97*57b952d6SSatish Balay       }
98*57b952d6SSatish Balay     }
99*57b952d6SSatish Balay   }
100*57b952d6SSatish Balay   return 0;
101*57b952d6SSatish Balay }
102*57b952d6SSatish Balay 
103*57b952d6SSatish Balay 
104*57b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
105*57b952d6SSatish Balay {
106*57b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
107*57b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
108*57b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
109*57b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
110*57b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
111*57b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
112*57b952d6SSatish Balay   InsertMode  addv;
113*57b952d6SSatish Balay   Scalar      *rvalues,*svalues;
114*57b952d6SSatish Balay 
115*57b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
116*57b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
117*57b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
118*57b952d6SSatish Balay     SETERRQ(1,"MatAssemblyBegin_MPIBAIJ:Some processors inserted others added");
119*57b952d6SSatish Balay   }
120*57b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
121*57b952d6SSatish Balay 
122*57b952d6SSatish Balay   /*  first count number of contributors to each processor */
123*57b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
124*57b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
125*57b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
126*57b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
127*57b952d6SSatish Balay     idx = baij->stash.idx[i];
128*57b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
129*57b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
130*57b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
131*57b952d6SSatish Balay       }
132*57b952d6SSatish Balay     }
133*57b952d6SSatish Balay   }
134*57b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
135*57b952d6SSatish Balay 
136*57b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
137*57b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
138*57b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
139*57b952d6SSatish Balay   nreceives = work[rank];
140*57b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
141*57b952d6SSatish Balay   nmax = work[rank];
142*57b952d6SSatish Balay   PetscFree(work);
143*57b952d6SSatish Balay 
144*57b952d6SSatish Balay   /* post receives:
145*57b952d6SSatish Balay        1) each message will consist of ordered pairs
146*57b952d6SSatish Balay      (global index,value) we store the global index as a double
147*57b952d6SSatish Balay      to simplify the message passing.
148*57b952d6SSatish Balay        2) since we don't know how long each individual message is we
149*57b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
150*57b952d6SSatish Balay      this is a lot of wasted space.
151*57b952d6SSatish Balay 
152*57b952d6SSatish Balay 
153*57b952d6SSatish Balay        This could be done better.
154*57b952d6SSatish Balay   */
155*57b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
156*57b952d6SSatish Balay   CHKPTRQ(rvalues);
157*57b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
158*57b952d6SSatish Balay   CHKPTRQ(recv_waits);
159*57b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
160*57b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
161*57b952d6SSatish Balay               comm,recv_waits+i);
162*57b952d6SSatish Balay   }
163*57b952d6SSatish Balay 
164*57b952d6SSatish Balay   /* do sends:
165*57b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
166*57b952d6SSatish Balay          the ith processor
167*57b952d6SSatish Balay   */
168*57b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
169*57b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
170*57b952d6SSatish Balay   CHKPTRQ(send_waits);
171*57b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
172*57b952d6SSatish Balay   starts[0] = 0;
173*57b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
174*57b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
175*57b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
176*57b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
177*57b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
178*57b952d6SSatish Balay   }
179*57b952d6SSatish Balay   PetscFree(owner);
180*57b952d6SSatish Balay   starts[0] = 0;
181*57b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
182*57b952d6SSatish Balay   count = 0;
183*57b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
184*57b952d6SSatish Balay     if (procs[i]) {
185*57b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
186*57b952d6SSatish Balay                 comm,send_waits+count++);
187*57b952d6SSatish Balay     }
188*57b952d6SSatish Balay   }
189*57b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
190*57b952d6SSatish Balay 
191*57b952d6SSatish Balay   /* Free cache space */
192*57b952d6SSatish Balay   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off processor values %d\n",rank,baij->stash.n);
193*57b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
194*57b952d6SSatish Balay 
195*57b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
196*57b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
197*57b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
198*57b952d6SSatish Balay   baij->rmax       = nmax;
199*57b952d6SSatish Balay 
200*57b952d6SSatish Balay   return 0;
201*57b952d6SSatish Balay }
202*57b952d6SSatish Balay 
203*57b952d6SSatish Balay 
204*57b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
205*57b952d6SSatish Balay {
206*57b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
207*57b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
208*57b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
209*57b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
210*57b952d6SSatish Balay   Scalar      *values,val;
211*57b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
212*57b952d6SSatish Balay 
213*57b952d6SSatish Balay   /*  wait on receives */
214*57b952d6SSatish Balay   while (count) {
215*57b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
216*57b952d6SSatish Balay     /* unpack receives into our local space */
217*57b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
218*57b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
219*57b952d6SSatish Balay     n = n/3;
220*57b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
221*57b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
222*57b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
223*57b952d6SSatish Balay       val = values[3*i+2];
224*57b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
225*57b952d6SSatish Balay         col -= baij->cstart*bs;
226*57b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
227*57b952d6SSatish Balay       }
228*57b952d6SSatish Balay       else {
229*57b952d6SSatish Balay         if (mat->was_assembled) {
230*57b952d6SSatish Balay           if (!baij->colmap) {ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);}
231*57b952d6SSatish Balay           col = baij->colmap[col/bs]*bs + col%bs;
232*57b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
233*57b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
234*57b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
235*57b952d6SSatish Balay           }
236*57b952d6SSatish Balay         }
237*57b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
238*57b952d6SSatish Balay       }
239*57b952d6SSatish Balay     }
240*57b952d6SSatish Balay     count--;
241*57b952d6SSatish Balay   }
242*57b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
243*57b952d6SSatish Balay 
244*57b952d6SSatish Balay   /* wait on sends */
245*57b952d6SSatish Balay   if (baij->nsends) {
246*57b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
247*57b952d6SSatish Balay     CHKPTRQ(send_status);
248*57b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
249*57b952d6SSatish Balay     PetscFree(send_status);
250*57b952d6SSatish Balay   }
251*57b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
252*57b952d6SSatish Balay 
253*57b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
254*57b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
255*57b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
256*57b952d6SSatish Balay 
257*57b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
258*57b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
259*57b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
260*57b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
261*57b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
262*57b952d6SSatish Balay   }
263*57b952d6SSatish Balay 
264*57b952d6SSatish Balay   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
265*57b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
266*57b952d6SSatish Balay   }
267*57b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
268*57b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
269*57b952d6SSatish Balay 
270*57b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
271*57b952d6SSatish Balay   return 0;
272*57b952d6SSatish Balay }
273*57b952d6SSatish Balay 
274*57b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
275*57b952d6SSatish Balay {
276*57b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
277*57b952d6SSatish Balay   int          ierr;
278*57b952d6SSatish Balay 
279*57b952d6SSatish Balay   if (baij->size == 1) {
280*57b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
281*57b952d6SSatish Balay   }
282*57b952d6SSatish Balay   else SETERRQ(1,"MatView_MPIBAIJ_Binary:Only uniprocessor output supported");
283*57b952d6SSatish Balay   return 0;
284*57b952d6SSatish Balay }
285*57b952d6SSatish Balay 
286*57b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
287*57b952d6SSatish Balay {
288*57b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
289*57b952d6SSatish Balay   int          ierr, format,rank,bs=baij->bs,bs2=baij->bs2;
290*57b952d6SSatish Balay   FILE         *fd;
291*57b952d6SSatish Balay   ViewerType   vtype;
292*57b952d6SSatish Balay 
293*57b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
294*57b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
295*57b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
296*57b952d6SSatish Balay     if (format == ASCII_FORMAT_INFO_DETAILED) {
297*57b952d6SSatish Balay       int nz, nzalloc, mem;
298*57b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
299*57b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
300*57b952d6SSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
301*57b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
302*57b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
303*57b952d6SSatish Balay               rank,baij->m,nz*bs,nzalloc*bs,baij->bs,mem);
304*57b952d6SSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
305*57b952d6SSatish Balay       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz*bs);
306*57b952d6SSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
307*57b952d6SSatish Balay       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz*bs);
308*57b952d6SSatish Balay       fflush(fd);
309*57b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
310*57b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
311*57b952d6SSatish Balay       return 0;
312*57b952d6SSatish Balay     }
313*57b952d6SSatish Balay     else if (format == ASCII_FORMAT_INFO) {
314*57b952d6SSatish Balay       return 0;
315*57b952d6SSatish Balay     }
316*57b952d6SSatish Balay   }
317*57b952d6SSatish Balay 
318*57b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
319*57b952d6SSatish Balay     Draw       draw;
320*57b952d6SSatish Balay     PetscTruth isnull;
321*57b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
322*57b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
323*57b952d6SSatish Balay   }
324*57b952d6SSatish Balay 
325*57b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
326*57b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
327*57b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
328*57b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
329*57b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
330*57b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
331*57b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
332*57b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
333*57b952d6SSatish Balay     fflush(fd);
334*57b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
335*57b952d6SSatish Balay   }
336*57b952d6SSatish Balay   else {
337*57b952d6SSatish Balay     int size = baij->size;
338*57b952d6SSatish Balay     rank = baij->rank;
339*57b952d6SSatish Balay     if (size == 1) {
340*57b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
341*57b952d6SSatish Balay     }
342*57b952d6SSatish Balay     else {
343*57b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
344*57b952d6SSatish Balay       Mat         A;
345*57b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
346*57b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
347*57b952d6SSatish Balay       int         mbs=baij->mbs;
348*57b952d6SSatish Balay       Scalar      *a;
349*57b952d6SSatish Balay 
350*57b952d6SSatish Balay       if (!rank) {
351*57b952d6SSatish Balay         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
352*57b952d6SSatish Balay                CHKERRQ(ierr);
353*57b952d6SSatish Balay       }
354*57b952d6SSatish Balay       else {
355*57b952d6SSatish Balay         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
356*57b952d6SSatish Balay                CHKERRQ(ierr);
357*57b952d6SSatish Balay       }
358*57b952d6SSatish Balay       PLogObjectParent(mat,A);
359*57b952d6SSatish Balay 
360*57b952d6SSatish Balay       /* copy over the A part */
361*57b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
362*57b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
363*57b952d6SSatish Balay       row = baij->rstart;
364*57b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
365*57b952d6SSatish Balay 
366*57b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
367*57b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
368*57b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
369*57b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
370*57b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
371*57b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
372*57b952d6SSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
373*57b952d6SSatish Balay             col++;
374*57b952d6SSatish Balay           }
375*57b952d6SSatish Balay         }
376*57b952d6SSatish Balay       }
377*57b952d6SSatish Balay       /* copy over the B part */
378*57b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
379*57b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
380*57b952d6SSatish Balay       row = baij->rstart*bs;
381*57b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
382*57b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
383*57b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
384*57b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
385*57b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
386*57b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
387*57b952d6SSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
388*57b952d6SSatish Balay             col++;
389*57b952d6SSatish Balay           }
390*57b952d6SSatish Balay         }
391*57b952d6SSatish Balay       }
392*57b952d6SSatish Balay       PetscFree(rvals);
393*57b952d6SSatish Balay       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
394*57b952d6SSatish Balay       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
395*57b952d6SSatish Balay       if (!rank) {
396*57b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
397*57b952d6SSatish Balay       }
398*57b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
399*57b952d6SSatish Balay     }
400*57b952d6SSatish Balay   }
401*57b952d6SSatish Balay   return 0;
402*57b952d6SSatish Balay }
403*57b952d6SSatish Balay 
404*57b952d6SSatish Balay 
405*57b952d6SSatish Balay 
406*57b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
407*57b952d6SSatish Balay {
408*57b952d6SSatish Balay   Mat         mat = (Mat) obj;
409*57b952d6SSatish Balay   int         ierr;
410*57b952d6SSatish Balay   ViewerType  vtype;
411*57b952d6SSatish Balay 
412*57b952d6SSatish Balay   if (!viewer) {
413*57b952d6SSatish Balay     viewer = STDOUT_VIEWER_SELF;
414*57b952d6SSatish Balay   }
415*57b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
416*57b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
417*57b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
418*57b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
419*57b952d6SSatish Balay   }
420*57b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
421*57b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
422*57b952d6SSatish Balay   }
423*57b952d6SSatish Balay   return 0;
424*57b952d6SSatish Balay }
425*57b952d6SSatish Balay 
42679bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
42779bdfe76SSatish Balay {
42879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
42979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
43079bdfe76SSatish Balay   int         ierr;
43179bdfe76SSatish Balay 
43279bdfe76SSatish Balay #if defined(PETSC_LOG)
43379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
43479bdfe76SSatish Balay #endif
43579bdfe76SSatish Balay 
43679bdfe76SSatish Balay   PetscFree(baij->rowners);
43779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
43879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
43979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
44079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
44179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
44279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
44379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
44479bdfe76SSatish Balay   PetscFree(baij);
44579bdfe76SSatish Balay   PLogObjectDestroy(mat);
44679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
44779bdfe76SSatish Balay   return 0;
44879bdfe76SSatish Balay }
44979bdfe76SSatish Balay 
450*57b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
451*57b952d6SSatish Balay {
452*57b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
453*57b952d6SSatish Balay   *m = mat->M; *n = mat->N;
454*57b952d6SSatish Balay   return 0;
455*57b952d6SSatish Balay }
456*57b952d6SSatish Balay 
457*57b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
458*57b952d6SSatish Balay {
459*57b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
460*57b952d6SSatish Balay   *m = mat->m; *n = mat->N;
461*57b952d6SSatish Balay   return 0;
462*57b952d6SSatish Balay }
463*57b952d6SSatish Balay 
464*57b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
465*57b952d6SSatish Balay {
466*57b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
467*57b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
468*57b952d6SSatish Balay   return 0;
469*57b952d6SSatish Balay }
470*57b952d6SSatish Balay 
47179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
47279bdfe76SSatish Balay static struct _MatOps MatOps = {
473*57b952d6SSatish Balay   MatSetValues_MPIBAIJ,0,0,0,
47479bdfe76SSatish Balay   0,0,0,0,
47579bdfe76SSatish Balay   0,0,0,0,
47679bdfe76SSatish Balay   0,0,0,0,
47779bdfe76SSatish Balay   0,0,0,0,
478*57b952d6SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,0,
479*57b952d6SSatish Balay   0,0,MatGetReordering_MPIBAIJ,0,
480*57b952d6SSatish Balay   0,0,0,MatGetSize_MPIBAIJ,
481*57b952d6SSatish Balay   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
48279bdfe76SSatish Balay   0,0,0,0,
48379bdfe76SSatish Balay   0,0,0,0,
48479bdfe76SSatish Balay   0,0,0,0,
48579bdfe76SSatish Balay   0,0,0,0,
48679bdfe76SSatish Balay   0};
48779bdfe76SSatish Balay 
48879bdfe76SSatish Balay 
48979bdfe76SSatish Balay /*@C
49079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
49179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
49279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
49379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
49479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
49579bdfe76SSatish Balay 
49679bdfe76SSatish Balay    Input Parameters:
49779bdfe76SSatish Balay .  comm - MPI communicator
49879bdfe76SSatish Balay .  bs   - size of blockk
49979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
50079bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
50179bdfe76SSatish Balay            if N is given)
50279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
50379bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
50479bdfe76SSatish Balay            if n is given)
50579bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
50679bdfe76SSatish Balay            submatrix  (same for all local rows)
50779bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
50879bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
50979bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
51079bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
51179bdfe76SSatish Balay            submatrix (same for all local rows).
51279bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
51379bdfe76SSatish Balay            submatrix or null (possibly different for each row).
51479bdfe76SSatish Balay 
51579bdfe76SSatish Balay    Output Parameter:
51679bdfe76SSatish Balay .  A - the matrix
51779bdfe76SSatish Balay 
51879bdfe76SSatish Balay    Notes:
51979bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
52079bdfe76SSatish Balay    (possibly both).
52179bdfe76SSatish Balay 
52279bdfe76SSatish Balay    Storage Information:
52379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
52479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
52579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
52679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
52779bdfe76SSatish Balay 
52879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
52979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
53079bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
53179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
53279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
53379bdfe76SSatish Balay 
53479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
53579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
53679bdfe76SSatish Balay 
53779bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
53879bdfe76SSatish Balay $         -------------------
53979bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
54079bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
54179bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
54279bdfe76SSatish Balay $         -------------------
54379bdfe76SSatish Balay $
54479bdfe76SSatish Balay 
54579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
54679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
54779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
548*57b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
54979bdfe76SSatish Balay 
55079bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
55179bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
55279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
55379bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
55479bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
55579bdfe76SSatish Balay 
55679bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
55779bdfe76SSatish Balay 
55879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
55979bdfe76SSatish Balay @*/
56079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
56179bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
56279bdfe76SSatish Balay {
56379bdfe76SSatish Balay   Mat          B;
56479bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
56579bdfe76SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs;
56679bdfe76SSatish Balay 
56779bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
56879bdfe76SSatish Balay   *A = 0;
56979bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
57079bdfe76SSatish Balay   PLogObjectCreate(B);
57179bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
57279bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
57379bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
57479bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
57579bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
576*57b952d6SSatish Balay 
57779bdfe76SSatish Balay   B->factor     = 0;
57879bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
57979bdfe76SSatish Balay 
58079bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
58179bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
58279bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
58379bdfe76SSatish Balay 
58479bdfe76SSatish Balay   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
585*57b952d6SSatish Balay     SETERRQ(1,"MatCreateMPIBAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
58679bdfe76SSatish Balay 
58779bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
58879bdfe76SSatish Balay     work[0] = m; work[1] = n;
58979bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
59079bdfe76SSatish Balay     if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
59179bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
59279bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
59379bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
59479bdfe76SSatish Balay   }
59579bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
59679bdfe76SSatish Balay     Mbs = M/bs;
59779bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
59879bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
59979bdfe76SSatish Balay     m   = mbs*bs;
60079bdfe76SSatish Balay   }
60179bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
60279bdfe76SSatish Balay     Nbs = N/bs;
60379bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
60479bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
60579bdfe76SSatish Balay     n   = nbs*bs;
60679bdfe76SSatish Balay   }
60779bdfe76SSatish Balay 
60879bdfe76SSatish Balay   b->m = m; B->m = m;
60979bdfe76SSatish Balay   b->n = n; B->n = n;
61079bdfe76SSatish Balay   b->N = N; B->N = N;
61179bdfe76SSatish Balay   b->M = M; B->M = M;
61279bdfe76SSatish Balay   b->bs  = bs;
61379bdfe76SSatish Balay   b->bs2 = bs*bs;
61479bdfe76SSatish Balay   b->mbs = mbs;
61579bdfe76SSatish Balay   b->nbs = nbs;
61679bdfe76SSatish Balay   b->Mbs = Mbs;
61779bdfe76SSatish Balay   b->Nbs = Nbs;
61879bdfe76SSatish Balay 
61979bdfe76SSatish Balay   /* build local table of row and column ownerships */
62079bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
62179bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
62279bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
62379bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
62479bdfe76SSatish Balay   b->rowners[0] = 0;
62579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
62679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
62779bdfe76SSatish Balay   }
62879bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
62979bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
63079bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
63179bdfe76SSatish Balay   b->cowners[0] = 0;
63279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
63379bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
63479bdfe76SSatish Balay   }
63579bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
63679bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
63779bdfe76SSatish Balay 
63879bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
63979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
64079bdfe76SSatish Balay   PLogObjectParent(B,b->A);
64179bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
64279bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
64379bdfe76SSatish Balay   PLogObjectParent(B,b->B);
64479bdfe76SSatish Balay 
64579bdfe76SSatish Balay   /* build cache for off array entries formed */
64679bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
64779bdfe76SSatish Balay   b->colmap      = 0;
64879bdfe76SSatish Balay   b->garray      = 0;
64979bdfe76SSatish Balay   b->roworiented = 1;
65079bdfe76SSatish Balay 
65179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
65279bdfe76SSatish Balay   b->lvec      = 0;
65379bdfe76SSatish Balay   b->Mvctx     = 0;
65479bdfe76SSatish Balay 
65579bdfe76SSatish Balay   /* stuff for MatGetRow() */
65679bdfe76SSatish Balay   b->rowindices   = 0;
65779bdfe76SSatish Balay   b->rowvalues    = 0;
65879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
65979bdfe76SSatish Balay 
66079bdfe76SSatish Balay   *A = B;
66179bdfe76SSatish Balay   return 0;
66279bdfe76SSatish Balay }
663*57b952d6SSatish Balay 
664*57b952d6SSatish Balay #include "sys.h"
665*57b952d6SSatish Balay 
666*57b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
667*57b952d6SSatish Balay {
668*57b952d6SSatish Balay   Mat          A;
669*57b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
670*57b952d6SSatish Balay   Scalar       *vals,*buf;
671*57b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
672*57b952d6SSatish Balay   MPI_Status   status;
673*57b952d6SSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
674*57b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
675*57b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
676*57b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
677*57b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
678*57b952d6SSatish Balay 
679*57b952d6SSatish Balay 
680*57b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
681*57b952d6SSatish Balay   bs2  = bs*bs;
682*57b952d6SSatish Balay 
683*57b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
684*57b952d6SSatish Balay   if (!rank) {
685*57b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
686*57b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
687*57b952d6SSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
688*57b952d6SSatish Balay   }
689*57b952d6SSatish Balay 
690*57b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
691*57b952d6SSatish Balay   M = header[1]; N = header[2];
692*57b952d6SSatish Balay 
693*57b952d6SSatish Balay 
694*57b952d6SSatish Balay   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
695*57b952d6SSatish Balay 
696*57b952d6SSatish Balay   /*
697*57b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
698*57b952d6SSatish Balay      divisible by the blocksize
699*57b952d6SSatish Balay   */
700*57b952d6SSatish Balay   Mbs        = M/bs;
701*57b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
702*57b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
703*57b952d6SSatish Balay   else                  Mbs++;
704*57b952d6SSatish Balay   if (extra_rows &&!rank) {
705*57b952d6SSatish Balay     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize");
706*57b952d6SSatish Balay   }
707*57b952d6SSatish Balay   /* determine ownership of all rows */
708*57b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
709*57b952d6SSatish Balay   m   = mbs * bs;
710*57b952d6SSatish Balay   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
711*57b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
712*57b952d6SSatish Balay   rowners[0] = 0;
713*57b952d6SSatish Balay   for ( i=2; i<=size; i++ ) {
714*57b952d6SSatish Balay     rowners[i] += rowners[i-1];
715*57b952d6SSatish Balay   }
716*57b952d6SSatish Balay   rstart = rowners[rank];
717*57b952d6SSatish Balay   rend   = rowners[rank+1];
718*57b952d6SSatish Balay 
719*57b952d6SSatish Balay   /* distribute row lengths to all processors */
720*57b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
721*57b952d6SSatish Balay   if (!rank) {
722*57b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
723*57b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
724*57b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
725*57b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
726*57b952d6SSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = (rowners[i+1] - rowners[i])*bs;
727*57b952d6SSatish Balay     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
728*57b952d6SSatish Balay     PetscFree(sndcounts);
729*57b952d6SSatish Balay   }
730*57b952d6SSatish Balay   else {
731*57b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
732*57b952d6SSatish Balay   }
733*57b952d6SSatish Balay 
734*57b952d6SSatish Balay   if (!rank) {
735*57b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
736*57b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
737*57b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
738*57b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
739*57b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
740*57b952d6SSatish Balay         procsnz[i] += rowlengths[j];
741*57b952d6SSatish Balay       }
742*57b952d6SSatish Balay     }
743*57b952d6SSatish Balay     PetscFree(rowlengths);
744*57b952d6SSatish Balay 
745*57b952d6SSatish Balay     /* determine max buffer needed and allocate it */
746*57b952d6SSatish Balay     maxnz = 0;
747*57b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
748*57b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
749*57b952d6SSatish Balay     }
750*57b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
751*57b952d6SSatish Balay 
752*57b952d6SSatish Balay     /* read in my part of the matrix column indices  */
753*57b952d6SSatish Balay     nz = procsnz[0];
754*57b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
755*57b952d6SSatish Balay     mycols = ibuf;
756*57b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
757*57b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
758*57b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
759*57b952d6SSatish Balay       nz = procsnz[i];
760*57b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
761*57b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
762*57b952d6SSatish Balay     }
763*57b952d6SSatish Balay     /* read in the stuff for the last proc */
764*57b952d6SSatish Balay     if ( size != 1 ) {
765*57b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
766*57b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
767*57b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
768*57b952d6SSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,i,tag,comm);
769*57b952d6SSatish Balay     }
770*57b952d6SSatish Balay     PetscFree(cols);
771*57b952d6SSatish Balay   }
772*57b952d6SSatish Balay   else {
773*57b952d6SSatish Balay     /* determine buffer space needed for message */
774*57b952d6SSatish Balay     nz = 0;
775*57b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
776*57b952d6SSatish Balay       nz += locrowlens[i];
777*57b952d6SSatish Balay     }
778*57b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
779*57b952d6SSatish Balay     mycols = ibuf;
780*57b952d6SSatish Balay     /* receive message of column indices*/
781*57b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
782*57b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
783*57b952d6SSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
784*57b952d6SSatish Balay   }
785*57b952d6SSatish Balay 
786*57b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
787*57b952d6SSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(dlens);
788*57b952d6SSatish Balay   odlens = dlens + (rstart-rend);
789*57b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
790*57b952d6SSatish Balay   PetscMemzero(mask,Mbs*sizeof(int));
791*57b952d6SSatish Balay   masked1 = mask    + Mbs;
792*57b952d6SSatish Balay   masked2 = masked1 + Mbs;
793*57b952d6SSatish Balay   rowcount = 0; nzcount = 0;
794*57b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
795*57b952d6SSatish Balay     dcount  = 0;
796*57b952d6SSatish Balay     odcount = 0;
797*57b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
798*57b952d6SSatish Balay       kmax = locrowlens[rowcount];
799*57b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
800*57b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
801*57b952d6SSatish Balay         if (!mask[tmp]) {
802*57b952d6SSatish Balay           mask[tmp] = 1;
803*57b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
804*57b952d6SSatish Balay           else masked1[dcount++] = tmp;
805*57b952d6SSatish Balay         }
806*57b952d6SSatish Balay       }
807*57b952d6SSatish Balay       rowcount++;
808*57b952d6SSatish Balay     }
809*57b952d6SSatish Balay     dlens[i]  = dcount;
810*57b952d6SSatish Balay     odlens[i] = odcount;
811*57b952d6SSatish Balay     /* zero out the mask elements we set */
812*57b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
813*57b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
814*57b952d6SSatish Balay   }
815*57b952d6SSatish Balay   /* create our matrix */
816*57b952d6SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
817*57b952d6SSatish Balay   A = *newmat;
818*57b952d6SSatish Balay   MatSetOption(A,COLUMNS_SORTED);
819*57b952d6SSatish Balay 
820*57b952d6SSatish Balay   if (!rank) {
821*57b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
822*57b952d6SSatish Balay 
823*57b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
824*57b952d6SSatish Balay     nz = procsnz[0];
825*57b952d6SSatish Balay     vals = buf;
826*57b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
827*57b952d6SSatish Balay 
828*57b952d6SSatish Balay     /* insert into matrix */
829*57b952d6SSatish Balay     jj      = rstart*bs;
830*57b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
831*57b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
832*57b952d6SSatish Balay       mycols += locrowlens[i];
833*57b952d6SSatish Balay       vals   += locrowlens[i];
834*57b952d6SSatish Balay       jj++;
835*57b952d6SSatish Balay     }
836*57b952d6SSatish Balay 
837*57b952d6SSatish Balay     /* read in other processors( except the last one) and ship out */
838*57b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
839*57b952d6SSatish Balay       nz = procsnz[i];
840*57b952d6SSatish Balay       vals = buf;
841*57b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
842*57b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
843*57b952d6SSatish Balay     }
844*57b952d6SSatish Balay     /* the last proc */
845*57b952d6SSatish Balay     if ( size != 1 ){
846*57b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
847*57b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
848*57b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
849*57b952d6SSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,i,A->tag,comm);
850*57b952d6SSatish Balay     }
851*57b952d6SSatish Balay     PetscFree(procsnz);
852*57b952d6SSatish Balay   }
853*57b952d6SSatish Balay   else {
854*57b952d6SSatish Balay     /* receive numeric values */
855*57b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
856*57b952d6SSatish Balay 
857*57b952d6SSatish Balay     /* receive message of values*/
858*57b952d6SSatish Balay     vals = buf;
859*57b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
860*57b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
861*57b952d6SSatish Balay     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
862*57b952d6SSatish Balay 
863*57b952d6SSatish Balay     /* insert into matrix */
864*57b952d6SSatish Balay     jj      = rstart*bs;
865*57b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
866*57b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
867*57b952d6SSatish Balay       mycols += locrowlens[i];
868*57b952d6SSatish Balay       vals   += locrowlens[i];
869*57b952d6SSatish Balay       jj++;
870*57b952d6SSatish Balay     }
871*57b952d6SSatish Balay   }
872*57b952d6SSatish Balay   PetscFree(locrowlens);
873*57b952d6SSatish Balay   PetscFree(buf);
874*57b952d6SSatish Balay   PetscFree(ibuf);
875*57b952d6SSatish Balay   PetscFree(rowners);
876*57b952d6SSatish Balay   PetscFree(dlens);
877*57b952d6SSatish Balay   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
878*57b952d6SSatish Balay   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
879*57b952d6SSatish Balay   return 0;
880*57b952d6SSatish Balay }
881*57b952d6SSatish Balay 
882*57b952d6SSatish Balay 
883