xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0a198c4c79a1f783fe701dd7a3eec31dea3d09ee)
1 
2 #ifndef lint
3 static char vcid[] = "$Id: mpiaij.c,v 1.170 1996/09/24 20:14:40 bsmith Exp bsmith $";
4 #endif
5 
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 int CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
18   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
19   int        n = B->n,i;
20 
21   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
22   PLogObjectMemory(mat,aij->N*sizeof(int));
23   PetscMemzero(aij->colmap,aij->N*sizeof(int));
24   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
25   return 0;
26 }
27 
28 extern int DisAssemble_MPIAIJ(Mat);
29 
30 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
31                            PetscTruth *done)
32 {
33   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
34   int        ierr;
35   if (aij->size == 1) {
36     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
37   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
38   return 0;
39 }
40 
41 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
42                                PetscTruth *done)
43 {
44   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
45   int        ierr;
46   if (aij->size == 1) {
47     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
48   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
49   return 0;
50 }
51 
52 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
53 
54 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
55 {
56   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
57   Scalar     value;
58   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
59   int        cstart = aij->cstart, cend = aij->cend,row,col;
60   int        roworiented = aij->roworiented;
61 
62 #if defined(PETSC_BOPT_g)
63   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
64     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
65   }
66 #endif
67   aij->insertmode = addv;
68   for ( i=0; i<m; i++ ) {
69 #if defined(PETSC_BOPT_g)
70     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
71     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
72 #endif
73     if (im[i] >= rstart && im[i] < rend) {
74       row = im[i] - rstart;
75       for ( j=0; j<n; j++ ) {
76         if (in[j] >= cstart && in[j] < cend){
77           col = in[j] - cstart;
78           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
79           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
80         }
81 #if defined(PETSC_BOPT_g)
82         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
83         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
84 #endif
85         else {
86           if (mat->was_assembled) {
87             if (!aij->colmap) {
88               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
89             }
90             col = aij->colmap[in[j]] - 1;
91             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
92               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
93               col =  in[j];
94             }
95           }
96           else col = in[j];
97           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
98           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
99         }
100       }
101     }
102     else {
103       if (roworiented) {
104         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
105       }
106       else {
107         row = im[i];
108         for ( j=0; j<n; j++ ) {
109           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
110         }
111       }
112     }
113   }
114   return 0;
115 }
116 
117 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
118 {
119   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
120   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
121   int        cstart = aij->cstart, cend = aij->cend,row,col;
122 
123   for ( i=0; i<m; i++ ) {
124     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
125     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
126     if (idxm[i] >= rstart && idxm[i] < rend) {
127       row = idxm[i] - rstart;
128       for ( j=0; j<n; j++ ) {
129         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
130         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
131         if (idxn[j] >= cstart && idxn[j] < cend){
132           col = idxn[j] - cstart;
133           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
134         }
135         else {
136           if (!aij->colmap) {
137             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
138           }
139           col = aij->colmap[idxn[j]] - 1;
140           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
141           else {
142             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
143           }
144         }
145       }
146     }
147     else {
148       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
149     }
150   }
151   return 0;
152 }
153 
154 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
155 {
156   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
157   MPI_Comm    comm = mat->comm;
158   int         size = aij->size, *owners = aij->rowners;
159   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
160   MPI_Request *send_waits,*recv_waits;
161   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
162   InsertMode  addv;
163   Scalar      *rvalues,*svalues;
164 
165   /* make sure all processors are either in INSERTMODE or ADDMODE */
166   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
167   if (addv == (ADD_VALUES|INSERT_VALUES)) {
168     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
169   }
170   aij->insertmode = addv; /* in case this processor had no cache */
171 
172   /*  first count number of contributors to each processor */
173   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
174   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
175   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
176   for ( i=0; i<aij->stash.n; i++ ) {
177     idx = aij->stash.idx[i];
178     for ( j=0; j<size; j++ ) {
179       if (idx >= owners[j] && idx < owners[j+1]) {
180         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
181       }
182     }
183   }
184   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
185 
186   /* inform other processors of number of messages and max length*/
187   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
188   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
189   nreceives = work[rank];
190   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
191   nmax = work[rank];
192   PetscFree(work);
193 
194   /* post receives:
195        1) each message will consist of ordered pairs
196      (global index,value) we store the global index as a double
197      to simplify the message passing.
198        2) since we don't know how long each individual message is we
199      allocate the largest needed buffer for each receive. Potentially
200      this is a lot of wasted space.
201 
202 
203        This could be done better.
204   */
205   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
206   CHKPTRQ(rvalues);
207   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
208   CHKPTRQ(recv_waits);
209   for ( i=0; i<nreceives; i++ ) {
210     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
211               comm,recv_waits+i);
212   }
213 
214   /* do sends:
215       1) starts[i] gives the starting index in svalues for stuff going to
216          the ith processor
217   */
218   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
219   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
220   CHKPTRQ(send_waits);
221   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
222   starts[0] = 0;
223   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
224   for ( i=0; i<aij->stash.n; i++ ) {
225     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
226     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
227     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
228   }
229   PetscFree(owner);
230   starts[0] = 0;
231   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
232   count = 0;
233   for ( i=0; i<size; i++ ) {
234     if (procs[i]) {
235       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
236                 comm,send_waits+count++);
237     }
238   }
239   PetscFree(starts); PetscFree(nprocs);
240 
241   /* Free cache space */
242   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
243   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
244 
245   aij->svalues    = svalues;    aij->rvalues    = rvalues;
246   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
247   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
248   aij->rmax       = nmax;
249 
250   return 0;
251 }
252 extern int MatSetUpMultiply_MPIAIJ(Mat);
253 
254 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
255 {
256   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
257   MPI_Status  *send_status,recv_status;
258   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
259   int         row,col,other_disassembled;
260   Scalar      *values,val;
261   InsertMode  addv = aij->insertmode;
262 
263   /*  wait on receives */
264   while (count) {
265     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
266     /* unpack receives into our local space */
267     values = aij->rvalues + 3*imdex*aij->rmax;
268     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
269     n = n/3;
270     for ( i=0; i<n; i++ ) {
271       row = (int) PetscReal(values[3*i]) - aij->rstart;
272       col = (int) PetscReal(values[3*i+1]);
273       val = values[3*i+2];
274       if (col >= aij->cstart && col < aij->cend) {
275         col -= aij->cstart;
276         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
277       }
278       else {
279         if (mat->was_assembled || mat->assembled) {
280           if (!aij->colmap) {
281             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
282           }
283           col = aij->colmap[col] - 1;
284           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
285             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
286             col = (int) PetscReal(values[3*i+1]);
287           }
288         }
289         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
290       }
291     }
292     count--;
293   }
294   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
295 
296   /* wait on sends */
297   if (aij->nsends) {
298     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
299     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
300     PetscFree(send_status);
301   }
302   PetscFree(aij->send_waits); PetscFree(aij->svalues);
303 
304   aij->insertmode = NOT_SET_VALUES;
305   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
306   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
307 
308   /* determine if any processor has disassembled, if so we must
309      also disassemble ourselfs, in order that we may reassemble. */
310   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
311   if (mat->was_assembled && !other_disassembled) {
312     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
313   }
314 
315   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
316     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
317   }
318   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
320 
321   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
322   return 0;
323 }
324 
325 static int MatZeroEntries_MPIAIJ(Mat A)
326 {
327   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
328   int        ierr;
329   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
330   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
331   return 0;
332 }
333 
334 /* the code does not do the diagonal entries correctly unless the
335    matrix is square and the column and row owerships are identical.
336    This is a BUG. The only way to fix it seems to be to access
337    aij->A and aij->B directly and not through the MatZeroRows()
338    routine.
339 */
340 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
341 {
342   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
343   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
344   int            *procs,*nprocs,j,found,idx,nsends,*work;
345   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
346   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
347   int            *lens,imdex,*lrows,*values;
348   MPI_Comm       comm = A->comm;
349   MPI_Request    *send_waits,*recv_waits;
350   MPI_Status     recv_status,*send_status;
351   IS             istmp;
352 
353   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
354   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
355 
356   /*  first count number of contributors to each processor */
357   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
358   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
359   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
360   for ( i=0; i<N; i++ ) {
361     idx = rows[i];
362     found = 0;
363     for ( j=0; j<size; j++ ) {
364       if (idx >= owners[j] && idx < owners[j+1]) {
365         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
366       }
367     }
368     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
369   }
370   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
371 
372   /* inform other processors of number of messages and max length*/
373   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
374   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
375   nrecvs = work[rank];
376   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
377   nmax = work[rank];
378   PetscFree(work);
379 
380   /* post receives:   */
381   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
382   CHKPTRQ(rvalues);
383   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
384   CHKPTRQ(recv_waits);
385   for ( i=0; i<nrecvs; i++ ) {
386     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
387   }
388 
389   /* do sends:
390       1) starts[i] gives the starting index in svalues for stuff going to
391          the ith processor
392   */
393   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
394   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
395   CHKPTRQ(send_waits);
396   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
397   starts[0] = 0;
398   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
399   for ( i=0; i<N; i++ ) {
400     svalues[starts[owner[i]]++] = rows[i];
401   }
402   ISRestoreIndices(is,&rows);
403 
404   starts[0] = 0;
405   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
406   count = 0;
407   for ( i=0; i<size; i++ ) {
408     if (procs[i]) {
409       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
410     }
411   }
412   PetscFree(starts);
413 
414   base = owners[rank];
415 
416   /*  wait on receives */
417   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
418   source = lens + nrecvs;
419   count  = nrecvs; slen = 0;
420   while (count) {
421     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
422     /* unpack receives into our local space */
423     MPI_Get_count(&recv_status,MPI_INT,&n);
424     source[imdex]  = recv_status.MPI_SOURCE;
425     lens[imdex]  = n;
426     slen += n;
427     count--;
428   }
429   PetscFree(recv_waits);
430 
431   /* move the data into the send scatter */
432   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
433   count = 0;
434   for ( i=0; i<nrecvs; i++ ) {
435     values = rvalues + i*nmax;
436     for ( j=0; j<lens[i]; j++ ) {
437       lrows[count++] = values[j] - base;
438     }
439   }
440   PetscFree(rvalues); PetscFree(lens);
441   PetscFree(owner); PetscFree(nprocs);
442 
443   /* actually zap the local rows */
444   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
445   PLogObjectParent(A,istmp);
446   PetscFree(lrows);
447   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
448   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
449   ierr = ISDestroy(istmp); CHKERRQ(ierr);
450 
451   /* wait on sends */
452   if (nsends) {
453     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
454     CHKPTRQ(send_status);
455     MPI_Waitall(nsends,send_waits,send_status);
456     PetscFree(send_status);
457   }
458   PetscFree(send_waits); PetscFree(svalues);
459 
460   return 0;
461 }
462 
463 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
464 {
465   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466   int        ierr,nt;
467 
468   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
469   if (nt != a->n) {
470     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
471   }
472   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
473   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
474   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
475   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
476   return 0;
477 }
478 
479 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
480 {
481   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
482   int        ierr;
483   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
484   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
485   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
486   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
487   return 0;
488 }
489 
490 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
491 {
492   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
493   int        ierr;
494 
495   /* do nondiagonal part */
496   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
497   /* send it on its way */
498   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
499   /* do local part */
500   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
501   /* receive remote parts: note this assumes the values are not actually */
502   /* inserted in yy until the next line, which is true for my implementation*/
503   /* but is not perhaps always true. */
504   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
505   return 0;
506 }
507 
508 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
509 {
510   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
511   int        ierr;
512 
513   /* do nondiagonal part */
514   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
515   /* send it on its way */
516   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
517   /* do local part */
518   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
519   /* receive remote parts: note this assumes the values are not actually */
520   /* inserted in yy until the next line, which is true for my implementation*/
521   /* but is not perhaps always true. */
522   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
523   return 0;
524 }
525 
526 /*
527   This only works correctly for square matrices where the subblock A->A is the
528    diagonal block
529 */
530 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
531 {
532   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
533   if (a->M != a->N)
534     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
535   if (a->rstart != a->cstart || a->rend != a->cend) {
536     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
537   return MatGetDiagonal(a->A,v);
538 }
539 
540 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
541 {
542   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
543   int        ierr;
544   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
545   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
546   return 0;
547 }
548 
549 static int MatDestroy_MPIAIJ(PetscObject obj)
550 {
551   Mat        mat = (Mat) obj;
552   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
553   int        ierr;
554 #if defined(PETSC_LOG)
555   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
556 #endif
557   PetscFree(aij->rowners);
558   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
559   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
560   if (aij->colmap) PetscFree(aij->colmap);
561   if (aij->garray) PetscFree(aij->garray);
562   if (aij->lvec)   VecDestroy(aij->lvec);
563   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
564   if (aij->rowvalues) PetscFree(aij->rowvalues);
565   PetscFree(aij);
566   PLogObjectDestroy(mat);
567   PetscHeaderDestroy(mat);
568   return 0;
569 }
570 #include "draw.h"
571 #include "pinclude/pviewer.h"
572 
573 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
574 {
575   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
576   int         ierr;
577 
578   if (aij->size == 1) {
579     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
580   }
581   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
582   return 0;
583 }
584 
585 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
586 {
587   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
588   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
589   int         ierr, format,shift = C->indexshift,rank;
590   FILE        *fd;
591   ViewerType  vtype;
592 
593   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
594   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
595     ierr = ViewerGetFormat(viewer,&format);
596     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
597       MatInfo info;
598       int     flg;
599       MPI_Comm_rank(mat->comm,&rank);
600       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
601       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
602       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
603       PetscSequentialPhaseBegin(mat->comm,1);
604       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
605          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
606       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
607          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
608       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
609       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
610       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
611       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
612       fflush(fd);
613       PetscSequentialPhaseEnd(mat->comm,1);
614       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
615       return 0;
616     }
617     else if (format == VIEWER_FORMAT_ASCII_INFO) {
618       return 0;
619     }
620   }
621 
622   if (vtype == DRAW_VIEWER) {
623     Draw       draw;
624     PetscTruth isnull;
625     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
626     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
627   }
628 
629   if (vtype == ASCII_FILE_VIEWER) {
630     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
631     PetscSequentialPhaseBegin(mat->comm,1);
632     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
633            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
634            aij->cend);
635     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
636     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
637     fflush(fd);
638     PetscSequentialPhaseEnd(mat->comm,1);
639   }
640   else {
641     int size = aij->size;
642     rank = aij->rank;
643     if (size == 1) {
644       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
645     }
646     else {
647       /* assemble the entire matrix onto first processor. */
648       Mat         A;
649       Mat_SeqAIJ *Aloc;
650       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
651       Scalar      *a;
652 
653       if (!rank) {
654         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
655                CHKERRQ(ierr);
656       }
657       else {
658         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
659                CHKERRQ(ierr);
660       }
661       PLogObjectParent(mat,A);
662 
663       /* copy over the A part */
664       Aloc = (Mat_SeqAIJ*) aij->A->data;
665       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
666       row = aij->rstart;
667       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
668       for ( i=0; i<m; i++ ) {
669         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
670         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
671       }
672       aj = Aloc->j;
673       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
674 
675       /* copy over the B part */
676       Aloc = (Mat_SeqAIJ*) aij->B->data;
677       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
678       row = aij->rstart;
679       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
680       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
681       for ( i=0; i<m; i++ ) {
682         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
683         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
684       }
685       PetscFree(ct);
686       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
687       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
688       if (!rank) {
689         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
690       }
691       ierr = MatDestroy(A); CHKERRQ(ierr);
692     }
693   }
694   return 0;
695 }
696 
697 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
698 {
699   Mat         mat = (Mat) obj;
700   int         ierr;
701   ViewerType  vtype;
702 
703   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
704   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
705       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
706     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
707   }
708   else if (vtype == BINARY_FILE_VIEWER) {
709     return MatView_MPIAIJ_Binary(mat,viewer);
710   }
711   return 0;
712 }
713 
714 /*
715     This has to provide several versions.
716 
717      2) a) use only local smoothing updating outer values only once.
718         b) local smoothing updating outer values each inner iteration
719      3) color updating out values betwen colors.
720 */
721 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
722                            double fshift,int its,Vec xx)
723 {
724   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
725   Mat        AA = mat->A, BB = mat->B;
726   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
727   Scalar     *b,*x,*xs,*ls,d,*v,sum;
728   int        ierr,*idx, *diag;
729   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
730 
731   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
732   xs = x + shift; /* shift by one for index start of 1 */
733   ls = ls + shift;
734   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
735   diag = A->diag;
736   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
737     if (flag & SOR_ZERO_INITIAL_GUESS) {
738       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
739     }
740     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
741     CHKERRQ(ierr);
742     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
743     CHKERRQ(ierr);
744     while (its--) {
745       /* go down through the rows */
746       for ( i=0; i<m; i++ ) {
747         n    = A->i[i+1] - A->i[i];
748         idx  = A->j + A->i[i] + shift;
749         v    = A->a + A->i[i] + shift;
750         sum  = b[i];
751         SPARSEDENSEMDOT(sum,xs,v,idx,n);
752         d    = fshift + A->a[diag[i]+shift];
753         n    = B->i[i+1] - B->i[i];
754         idx  = B->j + B->i[i] + shift;
755         v    = B->a + B->i[i] + shift;
756         SPARSEDENSEMDOT(sum,ls,v,idx,n);
757         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
758       }
759       /* come up through the rows */
760       for ( i=m-1; i>-1; i-- ) {
761         n    = A->i[i+1] - A->i[i];
762         idx  = A->j + A->i[i] + shift;
763         v    = A->a + A->i[i] + shift;
764         sum  = b[i];
765         SPARSEDENSEMDOT(sum,xs,v,idx,n);
766         d    = fshift + A->a[diag[i]+shift];
767         n    = B->i[i+1] - B->i[i];
768         idx  = B->j + B->i[i] + shift;
769         v    = B->a + B->i[i] + shift;
770         SPARSEDENSEMDOT(sum,ls,v,idx,n);
771         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
772       }
773     }
774   }
775   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
776     if (flag & SOR_ZERO_INITIAL_GUESS) {
777       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
778     }
779     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
780     CHKERRQ(ierr);
781     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
782     CHKERRQ(ierr);
783     while (its--) {
784       for ( i=0; i<m; i++ ) {
785         n    = A->i[i+1] - A->i[i];
786         idx  = A->j + A->i[i] + shift;
787         v    = A->a + A->i[i] + shift;
788         sum  = b[i];
789         SPARSEDENSEMDOT(sum,xs,v,idx,n);
790         d    = fshift + A->a[diag[i]+shift];
791         n    = B->i[i+1] - B->i[i];
792         idx  = B->j + B->i[i] + shift;
793         v    = B->a + B->i[i] + shift;
794         SPARSEDENSEMDOT(sum,ls,v,idx,n);
795         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
796       }
797     }
798   }
799   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
800     if (flag & SOR_ZERO_INITIAL_GUESS) {
801       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
802     }
803     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
804                             mat->Mvctx); CHKERRQ(ierr);
805     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
806                             mat->Mvctx); CHKERRQ(ierr);
807     while (its--) {
808       for ( i=m-1; i>-1; i-- ) {
809         n    = A->i[i+1] - A->i[i];
810         idx  = A->j + A->i[i] + shift;
811         v    = A->a + A->i[i] + shift;
812         sum  = b[i];
813         SPARSEDENSEMDOT(sum,xs,v,idx,n);
814         d    = fshift + A->a[diag[i]+shift];
815         n    = B->i[i+1] - B->i[i];
816         idx  = B->j + B->i[i] + shift;
817         v    = B->a + B->i[i] + shift;
818         SPARSEDENSEMDOT(sum,ls,v,idx,n);
819         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
820       }
821     }
822   }
823   else {
824     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
825   }
826   return 0;
827 }
828 
829 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
830 {
831   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
832   Mat        A = mat->A, B = mat->B;
833   int        ierr;
834   double     isend[5], irecv[5];
835 
836   info->rows_global    = (double)mat->M;
837   info->columns_global = (double)mat->N;
838   info->rows_local     = (double)mat->m;
839   info->columns_local  = (double)mat->N;
840   info->block_size     = 1.0;
841   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
842   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
843   isend[3] = info->memory;  isend[4] = info->mallocs;
844   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
845   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
846   isend[3] += info->memory;  isend[4] += info->mallocs;
847   if (flag == MAT_LOCAL) {
848     info->nz_used      = isend[0];
849     info->nz_allocated = isend[1];
850     info->nz_unneeded  = isend[2];
851     info->memory       = isend[3];
852     info->mallocs      = isend[4];
853   } else if (flag == MAT_GLOBAL_MAX) {
854     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
855     info->nz_used      = irecv[0];
856     info->nz_allocated = irecv[1];
857     info->nz_unneeded  = irecv[2];
858     info->memory       = irecv[3];
859     info->mallocs      = irecv[4];
860   } else if (flag == MAT_GLOBAL_SUM) {
861     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
862     info->nz_used      = irecv[0];
863     info->nz_allocated = irecv[1];
864     info->nz_unneeded  = irecv[2];
865     info->memory       = irecv[3];
866     info->mallocs      = irecv[4];
867   }
868   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
869   info->fill_ratio_needed = 0;
870   info->factor_mallocs    = 0;
871 
872   return 0;
873 }
874 
875 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
876 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
877 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
878 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
879 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
880 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
881 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
882 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
883 
884 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
885 {
886   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
887 
888   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
889       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
890       op == MAT_COLUMNS_SORTED ||
891       op == MAT_ROW_ORIENTED) {
892         MatSetOption(a->A,op);
893         MatSetOption(a->B,op);
894   }
895   else if (op == MAT_ROWS_SORTED ||
896            op == MAT_SYMMETRIC ||
897            op == MAT_STRUCTURALLY_SYMMETRIC ||
898            op == MAT_YES_NEW_DIAGONALS)
899     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
900   else if (op == MAT_COLUMN_ORIENTED) {
901     a->roworiented = 0;
902     MatSetOption(a->A,op);
903     MatSetOption(a->B,op);
904   }
905   else if (op == MAT_NO_NEW_DIAGONALS)
906     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
907   else
908     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
909   return 0;
910 }
911 
912 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
913 {
914   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
915   *m = mat->M; *n = mat->N;
916   return 0;
917 }
918 
919 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
920 {
921   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
922   *m = mat->m; *n = mat->N;
923   return 0;
924 }
925 
926 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
927 {
928   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
929   *m = mat->rstart; *n = mat->rend;
930   return 0;
931 }
932 
933 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
934 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
935 
936 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
937 {
938   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
939   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
940   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
941   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
942   int        *cmap, *idx_p;
943 
944   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
945   mat->getrowactive = PETSC_TRUE;
946 
947   if (!mat->rowvalues && (idx || v)) {
948     /*
949         allocate enough space to hold information from the longest row.
950     */
951     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
952     int     max = 1,m = mat->m,tmp;
953     for ( i=0; i<m; i++ ) {
954       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
955       if (max < tmp) { max = tmp; }
956     }
957     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
958     CHKPTRQ(mat->rowvalues);
959     mat->rowindices = (int *) (mat->rowvalues + max);
960   }
961 
962   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
963   lrow = row - rstart;
964 
965   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
966   if (!v)   {pvA = 0; pvB = 0;}
967   if (!idx) {pcA = 0; if (!v) pcB = 0;}
968   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
969   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
970   nztot = nzA + nzB;
971 
972   cmap  = mat->garray;
973   if (v  || idx) {
974     if (nztot) {
975       /* Sort by increasing column numbers, assuming A and B already sorted */
976       int imark = -1;
977       if (v) {
978         *v = v_p = mat->rowvalues;
979         for ( i=0; i<nzB; i++ ) {
980           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
981           else break;
982         }
983         imark = i;
984         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
985         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
986       }
987       if (idx) {
988         *idx = idx_p = mat->rowindices;
989         if (imark > -1) {
990           for ( i=0; i<imark; i++ ) {
991             idx_p[i] = cmap[cworkB[i]];
992           }
993         } else {
994           for ( i=0; i<nzB; i++ ) {
995             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
996             else break;
997           }
998           imark = i;
999         }
1000         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1001         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1002       }
1003     }
1004     else {
1005       if (idx) *idx = 0;
1006       if (v)   *v   = 0;
1007     }
1008   }
1009   *nz = nztot;
1010   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1011   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1012   return 0;
1013 }
1014 
1015 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1016 {
1017   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1018   if (aij->getrowactive == PETSC_FALSE) {
1019     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
1020   }
1021   aij->getrowactive = PETSC_FALSE;
1022   return 0;
1023 }
1024 
1025 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1026 {
1027   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1028   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1029   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1030   double     sum = 0.0;
1031   Scalar     *v;
1032 
1033   if (aij->size == 1) {
1034     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1035   } else {
1036     if (type == NORM_FROBENIUS) {
1037       v = amat->a;
1038       for (i=0; i<amat->nz; i++ ) {
1039 #if defined(PETSC_COMPLEX)
1040         sum += real(conj(*v)*(*v)); v++;
1041 #else
1042         sum += (*v)*(*v); v++;
1043 #endif
1044       }
1045       v = bmat->a;
1046       for (i=0; i<bmat->nz; i++ ) {
1047 #if defined(PETSC_COMPLEX)
1048         sum += real(conj(*v)*(*v)); v++;
1049 #else
1050         sum += (*v)*(*v); v++;
1051 #endif
1052       }
1053       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1054       *norm = sqrt(*norm);
1055     }
1056     else if (type == NORM_1) { /* max column norm */
1057       double *tmp, *tmp2;
1058       int    *jj, *garray = aij->garray;
1059       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1060       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1061       PetscMemzero(tmp,aij->N*sizeof(double));
1062       *norm = 0.0;
1063       v = amat->a; jj = amat->j;
1064       for ( j=0; j<amat->nz; j++ ) {
1065         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1066       }
1067       v = bmat->a; jj = bmat->j;
1068       for ( j=0; j<bmat->nz; j++ ) {
1069         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1070       }
1071       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1072       for ( j=0; j<aij->N; j++ ) {
1073         if (tmp2[j] > *norm) *norm = tmp2[j];
1074       }
1075       PetscFree(tmp); PetscFree(tmp2);
1076     }
1077     else if (type == NORM_INFINITY) { /* max row norm */
1078       double ntemp = 0.0;
1079       for ( j=0; j<amat->m; j++ ) {
1080         v = amat->a + amat->i[j] + shift;
1081         sum = 0.0;
1082         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1083           sum += PetscAbsScalar(*v); v++;
1084         }
1085         v = bmat->a + bmat->i[j] + shift;
1086         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1087           sum += PetscAbsScalar(*v); v++;
1088         }
1089         if (sum > ntemp) ntemp = sum;
1090       }
1091       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1092     }
1093     else {
1094       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1095     }
1096   }
1097   return 0;
1098 }
1099 
1100 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1101 {
1102   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1103   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1104   int        ierr,shift = Aloc->indexshift;
1105   Mat        B;
1106   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1107   Scalar     *array;
1108 
1109   if (matout == PETSC_NULL && M != N)
1110     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1111   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1112          PETSC_NULL,&B); CHKERRQ(ierr);
1113 
1114   /* copy over the A part */
1115   Aloc = (Mat_SeqAIJ*) a->A->data;
1116   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1117   row = a->rstart;
1118   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1119   for ( i=0; i<m; i++ ) {
1120     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1121     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1122   }
1123   aj = Aloc->j;
1124   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1125 
1126   /* copy over the B part */
1127   Aloc = (Mat_SeqAIJ*) a->B->data;
1128   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1129   row = a->rstart;
1130   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1131   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1132   for ( i=0; i<m; i++ ) {
1133     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1134     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1135   }
1136   PetscFree(ct);
1137   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1138   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1139   if (matout != PETSC_NULL) {
1140     *matout = B;
1141   } else {
1142     /* This isn't really an in-place transpose .... but free data structures from a */
1143     PetscFree(a->rowners);
1144     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1145     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1146     if (a->colmap) PetscFree(a->colmap);
1147     if (a->garray) PetscFree(a->garray);
1148     if (a->lvec) VecDestroy(a->lvec);
1149     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1150     PetscFree(a);
1151     PetscMemcpy(A,B,sizeof(struct _Mat));
1152     PetscHeaderDestroy(B);
1153   }
1154   return 0;
1155 }
1156 
1157 int MatDiagonalScale_MPIAIJ(Mat A,Vec ll,Vec rr)
1158 {
1159   Mat a = ((Mat_MPIAIJ *) A->data)->A;
1160   Mat b = ((Mat_MPIAIJ *) A->data)->B;
1161   int ierr,s1,s2,s3;
1162 
1163   if (ll)  {
1164     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1165     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1166     if (s1!=s2) SETERRQ(1,"MatDiagonalScale_MPIAIJ:non-conforming local sizes");
1167     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1168     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1169   }
1170   if (rr) SETERRQ(1,"MatDiagonalScale_MPIAIJ:not supported for right vector");
1171   return 0;
1172 }
1173 
1174 
1175 extern int MatPrintHelp_SeqAIJ(Mat);
1176 static int MatPrintHelp_MPIAIJ(Mat A)
1177 {
1178   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1179 
1180   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1181   else return 0;
1182 }
1183 
1184 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1185 {
1186   *bs = 1;
1187   return 0;
1188 }
1189 
1190 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1191 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1192 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1193 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1194 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1195 /* -------------------------------------------------------------------*/
1196 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1197        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1198        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1199        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1200        0,0,
1201        0,0,
1202        0,0,
1203        MatRelax_MPIAIJ,
1204        MatTranspose_MPIAIJ,
1205        MatGetInfo_MPIAIJ,0,
1206        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1207        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1208        0,
1209        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1210        0,0,0,0,
1211        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1212        0,0,
1213        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1214        0,0,0,
1215        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1216        MatPrintHelp_MPIAIJ,
1217        MatScale_MPIAIJ,0,0,0,
1218        MatGetBlockSize_MPIAIJ,0,0,0,0,
1219        MatFDColoringCreate_MPIAIJ};
1220 
1221 
1222 /*@C
1223    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1224    (the default parallel PETSc format).  For good matrix assembly performance
1225    the user should preallocate the matrix storage by setting the parameters
1226    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1227    performance can be increased by more than a factor of 50.
1228 
1229    Input Parameters:
1230 .  comm - MPI communicator
1231 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1232            This value should be the same as the local size used in creating the
1233            y vector for the matrix-vector product y = Ax.
1234 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1235            This value should be the same as the local size used in creating the
1236            x vector for the matrix-vector product y = Ax.
1237 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1238 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1239 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1240            (same for all local rows)
1241 .  d_nzz - array containing the number of nonzeros in the various rows of the
1242            diagonal portion of the local submatrix (possibly different for each row)
1243            or PETSC_NULL. You must leave room for the diagonal entry even if
1244            it is zero.
1245 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1246            submatrix (same for all local rows).
1247 .  o_nzz - array containing the number of nonzeros in the various rows of the
1248            off-diagonal portion of the local submatrix (possibly different for
1249            each row) or PETSC_NULL.
1250 
1251    Output Parameter:
1252 .  A - the matrix
1253 
1254    Notes:
1255    The AIJ format (also called the Yale sparse matrix format or
1256    compressed row storage), is fully compatible with standard Fortran 77
1257    storage.  That is, the stored row and column indices can begin at
1258    either one (as in Fortran) or zero.  See the users manual for details.
1259 
1260    The user MUST specify either the local or global matrix dimensions
1261    (possibly both).
1262 
1263    By default, this format uses inodes (identical nodes) when possible.
1264    We search for consecutive rows with the same nonzero structure, thereby
1265    reusing matrix information to achieve increased efficiency.
1266 
1267    Options Database Keys:
1268 $    -mat_aij_no_inode  - Do not use inodes
1269 $    -mat_aij_inode_limit <limit> - Set inode limit.
1270 $        (max limit=5)
1271 $    -mat_aij_oneindex - Internally use indexing starting at 1
1272 $        rather than 0.  Note: When calling MatSetValues(),
1273 $        the user still MUST index entries starting at 0!
1274 
1275    Storage Information:
1276    For a square global matrix we define each processor's diagonal portion
1277    to be its local rows and the corresponding columns (a square submatrix);
1278    each processor's off-diagonal portion encompasses the remainder of the
1279    local matrix (a rectangular submatrix).
1280 
1281    The user can specify preallocated storage for the diagonal part of
1282    the local submatrix with either d_nz or d_nnz (not both).  Set
1283    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1284    memory allocation.  Likewise, specify preallocated storage for the
1285    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1286 
1287    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1288    the figure below we depict these three local rows and all columns (0-11).
1289 
1290 $          0 1 2 3 4 5 6 7 8 9 10 11
1291 $         -------------------
1292 $  row 3  |  o o o d d d o o o o o o
1293 $  row 4  |  o o o d d d o o o o o o
1294 $  row 5  |  o o o d d d o o o o o o
1295 $         -------------------
1296 $
1297 
1298    Thus, any entries in the d locations are stored in the d (diagonal)
1299    submatrix, and any entries in the o locations are stored in the
1300    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1301    stored simply in the MATSEQAIJ format for compressed row storage.
1302 
1303    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1304    and o_nz should indicate the number of nonzeros per row in the o matrix.
1305    In general, for PDE problems in which most nonzeros are near the diagonal,
1306    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1307    or you will get TERRIBLE performance; see the users' manual chapter on
1308    matrices and the file $(PETSC_DIR)/Performance.
1309 
1310 .keywords: matrix, aij, compressed row, sparse, parallel
1311 
1312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1313 @*/
1314 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1315                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1316 {
1317   Mat          B;
1318   Mat_MPIAIJ   *b;
1319   int          ierr, i,sum[2],work[2],size;
1320 
1321   *A = 0;
1322   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1323   PLogObjectCreate(B);
1324   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1325   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1326   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1327   MPI_Comm_size(comm,&size);
1328   if (size == 1) {
1329     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1330     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1331     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1332     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1333     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1334     B->ops.solve             = MatSolve_MPIAIJ;
1335     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1336     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1337     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1338     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1339   }
1340   B->destroy    = MatDestroy_MPIAIJ;
1341   B->view       = MatView_MPIAIJ;
1342   B->factor     = 0;
1343   B->assembled  = PETSC_FALSE;
1344 
1345   b->insertmode = NOT_SET_VALUES;
1346   MPI_Comm_rank(comm,&b->rank);
1347   MPI_Comm_size(comm,&b->size);
1348 
1349   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1350     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1351 
1352   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1353     work[0] = m; work[1] = n;
1354     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1355     if (M == PETSC_DECIDE) M = sum[0];
1356     if (N == PETSC_DECIDE) N = sum[1];
1357   }
1358   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1359   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1360   b->m = m; B->m = m;
1361   b->n = n; B->n = n;
1362   b->N = N; B->N = N;
1363   b->M = M; B->M = M;
1364 
1365   /* build local table of row and column ownerships */
1366   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1367   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1368   b->cowners = b->rowners + b->size + 2;
1369   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1370   b->rowners[0] = 0;
1371   for ( i=2; i<=b->size; i++ ) {
1372     b->rowners[i] += b->rowners[i-1];
1373   }
1374   b->rstart = b->rowners[b->rank];
1375   b->rend   = b->rowners[b->rank+1];
1376   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1377   b->cowners[0] = 0;
1378   for ( i=2; i<=b->size; i++ ) {
1379     b->cowners[i] += b->cowners[i-1];
1380   }
1381   b->cstart = b->cowners[b->rank];
1382   b->cend   = b->cowners[b->rank+1];
1383 
1384   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1385   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1386   PLogObjectParent(B,b->A);
1387   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1388   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1389   PLogObjectParent(B,b->B);
1390 
1391   /* build cache for off array entries formed */
1392   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1393   b->colmap      = 0;
1394   b->garray      = 0;
1395   b->roworiented = 1;
1396 
1397   /* stuff used for matrix vector multiply */
1398   b->lvec      = 0;
1399   b->Mvctx     = 0;
1400 
1401   /* stuff for MatGetRow() */
1402   b->rowindices   = 0;
1403   b->rowvalues    = 0;
1404   b->getrowactive = PETSC_FALSE;
1405 
1406   *A = B;
1407   return 0;
1408 }
1409 
1410 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1411 {
1412   Mat        mat;
1413   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1414   int        ierr, len=0, flg;
1415 
1416   *newmat       = 0;
1417   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1418   PLogObjectCreate(mat);
1419   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1420   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1421   mat->destroy    = MatDestroy_MPIAIJ;
1422   mat->view       = MatView_MPIAIJ;
1423   mat->factor     = matin->factor;
1424   mat->assembled  = PETSC_TRUE;
1425 
1426   a->m = mat->m   = oldmat->m;
1427   a->n = mat->n   = oldmat->n;
1428   a->M = mat->M   = oldmat->M;
1429   a->N = mat->N   = oldmat->N;
1430 
1431   a->rstart       = oldmat->rstart;
1432   a->rend         = oldmat->rend;
1433   a->cstart       = oldmat->cstart;
1434   a->cend         = oldmat->cend;
1435   a->size         = oldmat->size;
1436   a->rank         = oldmat->rank;
1437   a->insertmode   = NOT_SET_VALUES;
1438   a->rowvalues    = 0;
1439   a->getrowactive = PETSC_FALSE;
1440 
1441   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1442   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1443   a->cowners = a->rowners + a->size + 2;
1444   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1445   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1446   if (oldmat->colmap) {
1447     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1448     PLogObjectMemory(mat,(a->N)*sizeof(int));
1449     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1450   } else a->colmap = 0;
1451   if (oldmat->garray) {
1452     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1453     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1454     PLogObjectMemory(mat,len*sizeof(int));
1455     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1456   } else a->garray = 0;
1457 
1458   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1459   PLogObjectParent(mat,a->lvec);
1460   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1461   PLogObjectParent(mat,a->Mvctx);
1462   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1463   PLogObjectParent(mat,a->A);
1464   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1465   PLogObjectParent(mat,a->B);
1466   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1467   if (flg) {
1468     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1469   }
1470   *newmat = mat;
1471   return 0;
1472 }
1473 
1474 #include "sys.h"
1475 
1476 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1477 {
1478   Mat          A;
1479   int          i, nz, ierr, j,rstart, rend, fd;
1480   Scalar       *vals,*svals;
1481   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1482   MPI_Status   status;
1483   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1484   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1485   int          tag = ((PetscObject)viewer)->tag;
1486 
1487   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1488   if (!rank) {
1489     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1490     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1491     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1492   }
1493 
1494   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1495   M = header[1]; N = header[2];
1496   /* determine ownership of all rows */
1497   m = M/size + ((M % size) > rank);
1498   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1499   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1500   rowners[0] = 0;
1501   for ( i=2; i<=size; i++ ) {
1502     rowners[i] += rowners[i-1];
1503   }
1504   rstart = rowners[rank];
1505   rend   = rowners[rank+1];
1506 
1507   /* distribute row lengths to all processors */
1508   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1509   offlens = ourlens + (rend-rstart);
1510   if (!rank) {
1511     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1512     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1513     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1514     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1515     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1516     PetscFree(sndcounts);
1517   }
1518   else {
1519     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1520   }
1521 
1522   if (!rank) {
1523     /* calculate the number of nonzeros on each processor */
1524     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1525     PetscMemzero(procsnz,size*sizeof(int));
1526     for ( i=0; i<size; i++ ) {
1527       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1528         procsnz[i] += rowlengths[j];
1529       }
1530     }
1531     PetscFree(rowlengths);
1532 
1533     /* determine max buffer needed and allocate it */
1534     maxnz = 0;
1535     for ( i=0; i<size; i++ ) {
1536       maxnz = PetscMax(maxnz,procsnz[i]);
1537     }
1538     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1539 
1540     /* read in my part of the matrix column indices  */
1541     nz = procsnz[0];
1542     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1543     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1544 
1545     /* read in every one elses and ship off */
1546     for ( i=1; i<size; i++ ) {
1547       nz = procsnz[i];
1548       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1549       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1550     }
1551     PetscFree(cols);
1552   }
1553   else {
1554     /* determine buffer space needed for message */
1555     nz = 0;
1556     for ( i=0; i<m; i++ ) {
1557       nz += ourlens[i];
1558     }
1559     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1560 
1561     /* receive message of column indices*/
1562     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1563     MPI_Get_count(&status,MPI_INT,&maxnz);
1564     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1565   }
1566 
1567   /* loop over local rows, determining number of off diagonal entries */
1568   PetscMemzero(offlens,m*sizeof(int));
1569   jj = 0;
1570   for ( i=0; i<m; i++ ) {
1571     for ( j=0; j<ourlens[i]; j++ ) {
1572       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1573       jj++;
1574     }
1575   }
1576 
1577   /* create our matrix */
1578   for ( i=0; i<m; i++ ) {
1579     ourlens[i] -= offlens[i];
1580   }
1581   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1582   A = *newmat;
1583   MatSetOption(A,MAT_COLUMNS_SORTED);
1584   for ( i=0; i<m; i++ ) {
1585     ourlens[i] += offlens[i];
1586   }
1587 
1588   if (!rank) {
1589     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1590 
1591     /* read in my part of the matrix numerical values  */
1592     nz = procsnz[0];
1593     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1594 
1595     /* insert into matrix */
1596     jj      = rstart;
1597     smycols = mycols;
1598     svals   = vals;
1599     for ( i=0; i<m; i++ ) {
1600       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1601       smycols += ourlens[i];
1602       svals   += ourlens[i];
1603       jj++;
1604     }
1605 
1606     /* read in other processors and ship out */
1607     for ( i=1; i<size; i++ ) {
1608       nz = procsnz[i];
1609       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1610       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1611     }
1612     PetscFree(procsnz);
1613   }
1614   else {
1615     /* receive numeric values */
1616     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1617 
1618     /* receive message of values*/
1619     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1620     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1621     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1622 
1623     /* insert into matrix */
1624     jj      = rstart;
1625     smycols = mycols;
1626     svals   = vals;
1627     for ( i=0; i<m; i++ ) {
1628       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1629       smycols += ourlens[i];
1630       svals   += ourlens[i];
1631       jj++;
1632     }
1633   }
1634   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1635 
1636   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1637   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638   return 0;
1639 }
1640