xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4431cf12e9c6d8190ecf7b4f7131deaab4f8eaf8)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.180 1996/12/18 18:47:24 curfman Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNCTION__
15 #define __FUNCTION__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23   PLogObjectMemory(mat,aij->N*sizeof(int));
24   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
26   return 0;
27 }
28 
29 extern int DisAssemble_MPIAIJ(Mat);
30 
31 #undef __FUNCTION__
32 #define __FUNCTION__ "MatGetRowIJ_MPIAIJ"
33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
34                            PetscTruth *done)
35 {
36   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37   int        ierr;
38   if (aij->size == 1) {
39     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
41   return 0;
42 }
43 
44 #undef __FUNCTION__
45 #define __FUNCTION__ "MatRestoreRowIJ_MPIAIJ"
46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
47                                PetscTruth *done)
48 {
49   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
50   int        ierr;
51   if (aij->size == 1) {
52     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
54   return 0;
55 }
56 
57 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
58 
59 #undef __FUNCTION__
60 #define __FUNCTION__ "MatSetValues_MPIAIJ"
61 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
62 {
63   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
64   Scalar     value;
65   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
66   int        cstart = aij->cstart, cend = aij->cend,row,col;
67   int        roworiented = aij->roworiented;
68 
69 #if defined(PETSC_BOPT_g)
70   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
71     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
72   }
73 #endif
74   aij->insertmode = addv;
75   for ( i=0; i<m; i++ ) {
76 #if defined(PETSC_BOPT_g)
77     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
78     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
79 #endif
80     if (im[i] >= rstart && im[i] < rend) {
81       row = im[i] - rstart;
82       for ( j=0; j<n; j++ ) {
83         if (in[j] >= cstart && in[j] < cend){
84           col = in[j] - cstart;
85           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
86           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
87         }
88 #if defined(PETSC_BOPT_g)
89         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
90         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
91 #endif
92         else {
93           if (mat->was_assembled) {
94             if (!aij->colmap) {
95               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
96             }
97             col = aij->colmap[in[j]] - 1;
98             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
99               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
100               col =  in[j];
101             }
102           }
103           else col = in[j];
104           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
105           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
106         }
107       }
108     }
109     else {
110       if (roworiented && !aij->donotstash) {
111         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
112       }
113       else {
114         if (!aij->donotstash) {
115           row = im[i];
116           for ( j=0; j<n; j++ ) {
117             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
118           }
119         }
120       }
121     }
122   }
123   return 0;
124 }
125 
126 #undef __FUNCTION__
127 #define __FUNCTION__ "MatGetValues_MPIAIJ"
128 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
129 {
130   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
131   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
132   int        cstart = aij->cstart, cend = aij->cend,row,col;
133 
134   for ( i=0; i<m; i++ ) {
135     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
136     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
137     if (idxm[i] >= rstart && idxm[i] < rend) {
138       row = idxm[i] - rstart;
139       for ( j=0; j<n; j++ ) {
140         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
141         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
142         if (idxn[j] >= cstart && idxn[j] < cend){
143           col = idxn[j] - cstart;
144           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
145         }
146         else {
147           if (!aij->colmap) {
148             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
149           }
150           col = aij->colmap[idxn[j]] - 1;
151           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
152           else {
153             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
154           }
155         }
156       }
157     }
158     else {
159       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
160     }
161   }
162   return 0;
163 }
164 
165 #undef __FUNCTION__
166 #define __FUNCTION__ "MatAssemblyBegin_MPIAIJ"
167 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
168 {
169   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
170   MPI_Comm    comm = mat->comm;
171   int         size = aij->size, *owners = aij->rowners;
172   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
173   MPI_Request *send_waits,*recv_waits;
174   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
175   InsertMode  addv;
176   Scalar      *rvalues,*svalues;
177 
178   /* make sure all processors are either in INSERTMODE or ADDMODE */
179   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
180   if (addv == (ADD_VALUES|INSERT_VALUES)) {
181     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
182   }
183   aij->insertmode = addv; /* in case this processor had no cache */
184 
185   /*  first count number of contributors to each processor */
186   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
187   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
188   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
189   for ( i=0; i<aij->stash.n; i++ ) {
190     idx = aij->stash.idx[i];
191     for ( j=0; j<size; j++ ) {
192       if (idx >= owners[j] && idx < owners[j+1]) {
193         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
194       }
195     }
196   }
197   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
198 
199   /* inform other processors of number of messages and max length*/
200   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
201   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
202   nreceives = work[rank];
203   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
204   nmax = work[rank];
205   PetscFree(work);
206 
207   /* post receives:
208        1) each message will consist of ordered pairs
209      (global index,value) we store the global index as a double
210      to simplify the message passing.
211        2) since we don't know how long each individual message is we
212      allocate the largest needed buffer for each receive. Potentially
213      this is a lot of wasted space.
214 
215 
216        This could be done better.
217   */
218   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
219   CHKPTRQ(rvalues);
220   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
221   CHKPTRQ(recv_waits);
222   for ( i=0; i<nreceives; i++ ) {
223     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
224               comm,recv_waits+i);
225   }
226 
227   /* do sends:
228       1) starts[i] gives the starting index in svalues for stuff going to
229          the ith processor
230   */
231   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
232   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
233   CHKPTRQ(send_waits);
234   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
235   starts[0] = 0;
236   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
237   for ( i=0; i<aij->stash.n; i++ ) {
238     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
239     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
240     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
241   }
242   PetscFree(owner);
243   starts[0] = 0;
244   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
245   count = 0;
246   for ( i=0; i<size; i++ ) {
247     if (procs[i]) {
248       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
249                 comm,send_waits+count++);
250     }
251   }
252   PetscFree(starts); PetscFree(nprocs);
253 
254   /* Free cache space */
255   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
256   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
257 
258   aij->svalues    = svalues;    aij->rvalues    = rvalues;
259   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
260   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
261   aij->rmax       = nmax;
262 
263   return 0;
264 }
265 extern int MatSetUpMultiply_MPIAIJ(Mat);
266 
267 #undef __FUNCTION__
268 #define __FUNCTION__ "MatAssemblyEnd_MPIAIJ"
269 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
272   MPI_Status  *send_status,recv_status;
273   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
274   int         row,col,other_disassembled;
275   Scalar      *values,val;
276   InsertMode  addv = aij->insertmode;
277 
278   /*  wait on receives */
279   while (count) {
280     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
281     /* unpack receives into our local space */
282     values = aij->rvalues + 3*imdex*aij->rmax;
283     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
284     n = n/3;
285     for ( i=0; i<n; i++ ) {
286       row = (int) PetscReal(values[3*i]) - aij->rstart;
287       col = (int) PetscReal(values[3*i+1]);
288       val = values[3*i+2];
289       if (col >= aij->cstart && col < aij->cend) {
290         col -= aij->cstart;
291         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
292       }
293       else {
294         if (mat->was_assembled || mat->assembled) {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298           col = aij->colmap[col] - 1;
299           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
300             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
301             col = (int) PetscReal(values[3*i+1]);
302           }
303         }
304         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
305       }
306     }
307     count--;
308   }
309   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
310 
311   /* wait on sends */
312   if (aij->nsends) {
313     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
314     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
315     PetscFree(send_status);
316   }
317   PetscFree(aij->send_waits); PetscFree(aij->svalues);
318 
319   aij->insertmode = NOT_SET_VALUES;
320   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
322 
323   /* determine if any processor has disassembled, if so we must
324      also disassemble ourselfs, in order that we may reassemble. */
325   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
326   if (mat->was_assembled && !other_disassembled) {
327     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
328   }
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
332   }
333   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
335 
336   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
337   return 0;
338 }
339 
340 #undef __FUNCTION__
341 #define __FUNCTION__ "MatZeroEntries_MPIAIJ"
342 static int MatZeroEntries_MPIAIJ(Mat A)
343 {
344   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
345   int        ierr;
346   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
347   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
348   return 0;
349 }
350 
351 /* the code does not do the diagonal entries correctly unless the
352    matrix is square and the column and row owerships are identical.
353    This is a BUG. The only way to fix it seems to be to access
354    aij->A and aij->B directly and not through the MatZeroRows()
355    routine.
356 */
357 #undef __FUNCTION__
358 #define __FUNCTION__ "MatZeroRows_MPIAIJ"
359 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
360 {
361   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
362   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
363   int            *procs,*nprocs,j,found,idx,nsends,*work;
364   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
365   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
366   int            *lens,imdex,*lrows,*values;
367   MPI_Comm       comm = A->comm;
368   MPI_Request    *send_waits,*recv_waits;
369   MPI_Status     recv_status,*send_status;
370   IS             istmp;
371 
372   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
373   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
374 
375   /*  first count number of contributors to each processor */
376   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
377   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
378   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
379   for ( i=0; i<N; i++ ) {
380     idx = rows[i];
381     found = 0;
382     for ( j=0; j<size; j++ ) {
383       if (idx >= owners[j] && idx < owners[j+1]) {
384         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
385       }
386     }
387     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
388   }
389   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
390 
391   /* inform other processors of number of messages and max length*/
392   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
393   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
394   nrecvs = work[rank];
395   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
396   nmax = work[rank];
397   PetscFree(work);
398 
399   /* post receives:   */
400   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
401   CHKPTRQ(rvalues);
402   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
403   CHKPTRQ(recv_waits);
404   for ( i=0; i<nrecvs; i++ ) {
405     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
406   }
407 
408   /* do sends:
409       1) starts[i] gives the starting index in svalues for stuff going to
410          the ith processor
411   */
412   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
413   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
414   CHKPTRQ(send_waits);
415   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
416   starts[0] = 0;
417   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
418   for ( i=0; i<N; i++ ) {
419     svalues[starts[owner[i]]++] = rows[i];
420   }
421   ISRestoreIndices(is,&rows);
422 
423   starts[0] = 0;
424   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
425   count = 0;
426   for ( i=0; i<size; i++ ) {
427     if (procs[i]) {
428       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
429     }
430   }
431   PetscFree(starts);
432 
433   base = owners[rank];
434 
435   /*  wait on receives */
436   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
437   source = lens + nrecvs;
438   count  = nrecvs; slen = 0;
439   while (count) {
440     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
441     /* unpack receives into our local space */
442     MPI_Get_count(&recv_status,MPI_INT,&n);
443     source[imdex]  = recv_status.MPI_SOURCE;
444     lens[imdex]  = n;
445     slen += n;
446     count--;
447   }
448   PetscFree(recv_waits);
449 
450   /* move the data into the send scatter */
451   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
452   count = 0;
453   for ( i=0; i<nrecvs; i++ ) {
454     values = rvalues + i*nmax;
455     for ( j=0; j<lens[i]; j++ ) {
456       lrows[count++] = values[j] - base;
457     }
458   }
459   PetscFree(rvalues); PetscFree(lens);
460   PetscFree(owner); PetscFree(nprocs);
461 
462   /* actually zap the local rows */
463   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
464   PLogObjectParent(A,istmp);
465   PetscFree(lrows);
466   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
467   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
468   ierr = ISDestroy(istmp); CHKERRQ(ierr);
469 
470   /* wait on sends */
471   if (nsends) {
472     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
473     CHKPTRQ(send_status);
474     MPI_Waitall(nsends,send_waits,send_status);
475     PetscFree(send_status);
476   }
477   PetscFree(send_waits); PetscFree(svalues);
478 
479   return 0;
480 }
481 
482 #undef __FUNCTION__
483 #define __FUNCTION__ "MatMult_MPIAIJ"
484 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
485 {
486   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487   int        ierr,nt;
488 
489   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
490   if (nt != a->n) {
491     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
492   }
493   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
494   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
495   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
496   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
497   return 0;
498 }
499 
500 #undef __FUNCTION__
501 #define __FUNCTION__ "MatMultAdd_MPIAIJ"
502 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
505   int        ierr;
506   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
507   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
508   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
509   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
510   return 0;
511 }
512 
513 #undef __FUNCTION__
514 #define __FUNCTION__ "MatMultTrans_MPIAIJ"
515 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
516 {
517   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
518   int        ierr;
519 
520   /* do nondiagonal part */
521   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
522   /* send it on its way */
523   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
524   /* do local part */
525   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
526   /* receive remote parts: note this assumes the values are not actually */
527   /* inserted in yy until the next line, which is true for my implementation*/
528   /* but is not perhaps always true. */
529   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
530   return 0;
531 }
532 
533 #undef __FUNCTION__
534 #define __FUNCTION__ "MatMultTransAdd_MPIAIJ"
535 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
536 {
537   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
538   int        ierr;
539 
540   /* do nondiagonal part */
541   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
542   /* send it on its way */
543   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
544   /* do local part */
545   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
546   /* receive remote parts: note this assumes the values are not actually */
547   /* inserted in yy until the next line, which is true for my implementation*/
548   /* but is not perhaps always true. */
549   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
550   return 0;
551 }
552 
553 /*
554   This only works correctly for square matrices where the subblock A->A is the
555    diagonal block
556 */
557 #undef __FUNCTION__
558 #define __FUNCTION__ "MatGetDiagonal_MPIAIJ"
559 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
560 {
561   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
562   if (a->M != a->N)
563     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
564   if (a->rstart != a->cstart || a->rend != a->cend) {
565     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
566   return MatGetDiagonal(a->A,v);
567 }
568 
569 #undef __FUNCTION__
570 #define __FUNCTION__ "MatScale_MPIAIJ"
571 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr;
575   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
576   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
577   return 0;
578 }
579 
580 #undef __FUNCTION__
581 #define __FUNCTION__ "MatDestroy_MPIAIJ"
582 static int MatDestroy_MPIAIJ(PetscObject obj)
583 {
584   Mat        mat = (Mat) obj;
585   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
586   int        ierr;
587 #if defined(PETSC_LOG)
588   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
589 #endif
590   PetscFree(aij->rowners);
591   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
592   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
593   if (aij->colmap) PetscFree(aij->colmap);
594   if (aij->garray) PetscFree(aij->garray);
595   if (aij->lvec)   VecDestroy(aij->lvec);
596   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
597   if (aij->rowvalues) PetscFree(aij->rowvalues);
598   PetscFree(aij);
599   if (mat->mapping) {
600     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
601   }
602   PLogObjectDestroy(mat);
603   PetscHeaderDestroy(mat);
604   return 0;
605 }
606 #include "draw.h"
607 #include "pinclude/pviewer.h"
608 
609 #undef __FUNCTION__
610 #define __FUNCTION__ "MatView_MPIAIJ_Binary"
611 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
612 {
613   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
614   int         ierr;
615 
616   if (aij->size == 1) {
617     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
618   }
619   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
620   return 0;
621 }
622 
623 #undef __FUNCTION__
624 #define __FUNCTION__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
625 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
626 {
627   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
628   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
629   int         ierr, format,shift = C->indexshift,rank;
630   FILE        *fd;
631   ViewerType  vtype;
632 
633   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
634   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
635     ierr = ViewerGetFormat(viewer,&format);
636     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
637       MatInfo info;
638       int     flg;
639       MPI_Comm_rank(mat->comm,&rank);
640       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
641       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
642       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
643       PetscSequentialPhaseBegin(mat->comm,1);
644       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
645          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
646       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
647          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
648       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
649       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
650       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
651       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
652       fflush(fd);
653       PetscSequentialPhaseEnd(mat->comm,1);
654       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
655       return 0;
656     }
657     else if (format == VIEWER_FORMAT_ASCII_INFO) {
658       return 0;
659     }
660   }
661 
662   if (vtype == DRAW_VIEWER) {
663     Draw       draw;
664     PetscTruth isnull;
665     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
666     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
667   }
668 
669   if (vtype == ASCII_FILE_VIEWER) {
670     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
671     PetscSequentialPhaseBegin(mat->comm,1);
672     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
673            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
674            aij->cend);
675     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
676     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
677     fflush(fd);
678     PetscSequentialPhaseEnd(mat->comm,1);
679   }
680   else {
681     int size = aij->size;
682     rank = aij->rank;
683     if (size == 1) {
684       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
685     }
686     else {
687       /* assemble the entire matrix onto first processor. */
688       Mat         A;
689       Mat_SeqAIJ *Aloc;
690       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
691       Scalar      *a;
692 
693       if (!rank) {
694         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
695                CHKERRQ(ierr);
696       }
697       else {
698         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
699                CHKERRQ(ierr);
700       }
701       PLogObjectParent(mat,A);
702 
703       /* copy over the A part */
704       Aloc = (Mat_SeqAIJ*) aij->A->data;
705       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
706       row = aij->rstart;
707       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
708       for ( i=0; i<m; i++ ) {
709         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
710         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
711       }
712       aj = Aloc->j;
713       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
714 
715       /* copy over the B part */
716       Aloc = (Mat_SeqAIJ*) aij->B->data;
717       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
718       row = aij->rstart;
719       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
720       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
721       for ( i=0; i<m; i++ ) {
722         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
723         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
724       }
725       PetscFree(ct);
726       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
727       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
728       if (!rank) {
729         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
730       }
731       ierr = MatDestroy(A); CHKERRQ(ierr);
732     }
733   }
734   return 0;
735 }
736 
737 #undef __FUNCTION__
738 #define __FUNCTION__ "MatView_MPIAIJ"
739 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
740 {
741   Mat         mat = (Mat) obj;
742   int         ierr;
743   ViewerType  vtype;
744 
745   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
746   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
747       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
748     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
749   }
750   else if (vtype == BINARY_FILE_VIEWER) {
751     return MatView_MPIAIJ_Binary(mat,viewer);
752   }
753   return 0;
754 }
755 
756 /*
757     This has to provide several versions.
758 
759      2) a) use only local smoothing updating outer values only once.
760         b) local smoothing updating outer values each inner iteration
761      3) color updating out values betwen colors.
762 */
763 #undef __FUNCTION__
764 #define __FUNCTION__ "MatRelax_MPIAIJ"
765 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
766                            double fshift,int its,Vec xx)
767 {
768   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
769   Mat        AA = mat->A, BB = mat->B;
770   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
771   Scalar     *b,*x,*xs,*ls,d,*v,sum;
772   int        ierr,*idx, *diag;
773   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
774 
775   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
776   xs = x + shift; /* shift by one for index start of 1 */
777   ls = ls + shift;
778   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
779   diag = A->diag;
780   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
781     if (flag & SOR_ZERO_INITIAL_GUESS) {
782       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
783     }
784     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
785     CHKERRQ(ierr);
786     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
787     CHKERRQ(ierr);
788     while (its--) {
789       /* go down through the rows */
790       for ( i=0; i<m; i++ ) {
791         n    = A->i[i+1] - A->i[i];
792         idx  = A->j + A->i[i] + shift;
793         v    = A->a + A->i[i] + shift;
794         sum  = b[i];
795         SPARSEDENSEMDOT(sum,xs,v,idx,n);
796         d    = fshift + A->a[diag[i]+shift];
797         n    = B->i[i+1] - B->i[i];
798         idx  = B->j + B->i[i] + shift;
799         v    = B->a + B->i[i] + shift;
800         SPARSEDENSEMDOT(sum,ls,v,idx,n);
801         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
802       }
803       /* come up through the rows */
804       for ( i=m-1; i>-1; i-- ) {
805         n    = A->i[i+1] - A->i[i];
806         idx  = A->j + A->i[i] + shift;
807         v    = A->a + A->i[i] + shift;
808         sum  = b[i];
809         SPARSEDENSEMDOT(sum,xs,v,idx,n);
810         d    = fshift + A->a[diag[i]+shift];
811         n    = B->i[i+1] - B->i[i];
812         idx  = B->j + B->i[i] + shift;
813         v    = B->a + B->i[i] + shift;
814         SPARSEDENSEMDOT(sum,ls,v,idx,n);
815         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
816       }
817     }
818   }
819   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
820     if (flag & SOR_ZERO_INITIAL_GUESS) {
821       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
822     }
823     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
824     CHKERRQ(ierr);
825     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
826     CHKERRQ(ierr);
827     while (its--) {
828       for ( i=0; i<m; i++ ) {
829         n    = A->i[i+1] - A->i[i];
830         idx  = A->j + A->i[i] + shift;
831         v    = A->a + A->i[i] + shift;
832         sum  = b[i];
833         SPARSEDENSEMDOT(sum,xs,v,idx,n);
834         d    = fshift + A->a[diag[i]+shift];
835         n    = B->i[i+1] - B->i[i];
836         idx  = B->j + B->i[i] + shift;
837         v    = B->a + B->i[i] + shift;
838         SPARSEDENSEMDOT(sum,ls,v,idx,n);
839         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
840       }
841     }
842   }
843   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
844     if (flag & SOR_ZERO_INITIAL_GUESS) {
845       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
846     }
847     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
848                             mat->Mvctx); CHKERRQ(ierr);
849     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
850                             mat->Mvctx); CHKERRQ(ierr);
851     while (its--) {
852       for ( i=m-1; i>-1; i-- ) {
853         n    = A->i[i+1] - A->i[i];
854         idx  = A->j + A->i[i] + shift;
855         v    = A->a + A->i[i] + shift;
856         sum  = b[i];
857         SPARSEDENSEMDOT(sum,xs,v,idx,n);
858         d    = fshift + A->a[diag[i]+shift];
859         n    = B->i[i+1] - B->i[i];
860         idx  = B->j + B->i[i] + shift;
861         v    = B->a + B->i[i] + shift;
862         SPARSEDENSEMDOT(sum,ls,v,idx,n);
863         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
864       }
865     }
866   }
867   else {
868     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
869   }
870   return 0;
871 }
872 
873 #undef __FUNCTION__
874 #define __FUNCTION__ "MatGetInfo_MPIAIJ"
875 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
876 {
877   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
878   Mat        A = mat->A, B = mat->B;
879   int        ierr;
880   double     isend[5], irecv[5];
881 
882   info->rows_global    = (double)mat->M;
883   info->columns_global = (double)mat->N;
884   info->rows_local     = (double)mat->m;
885   info->columns_local  = (double)mat->N;
886   info->block_size     = 1.0;
887   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
888   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
889   isend[3] = info->memory;  isend[4] = info->mallocs;
890   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
891   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
892   isend[3] += info->memory;  isend[4] += info->mallocs;
893   if (flag == MAT_LOCAL) {
894     info->nz_used      = isend[0];
895     info->nz_allocated = isend[1];
896     info->nz_unneeded  = isend[2];
897     info->memory       = isend[3];
898     info->mallocs      = isend[4];
899   } else if (flag == MAT_GLOBAL_MAX) {
900     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
901     info->nz_used      = irecv[0];
902     info->nz_allocated = irecv[1];
903     info->nz_unneeded  = irecv[2];
904     info->memory       = irecv[3];
905     info->mallocs      = irecv[4];
906   } else if (flag == MAT_GLOBAL_SUM) {
907     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
908     info->nz_used      = irecv[0];
909     info->nz_allocated = irecv[1];
910     info->nz_unneeded  = irecv[2];
911     info->memory       = irecv[3];
912     info->mallocs      = irecv[4];
913   }
914   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
915   info->fill_ratio_needed = 0;
916   info->factor_mallocs    = 0;
917 
918   return 0;
919 }
920 
921 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
922 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
923 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
924 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
925 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
926 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
927 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
928 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
929 
930 #undef __FUNCTION__
931 #define __FUNCTION__ "MatSetOption_MPIAIJ"
932 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
933 {
934   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
935 
936   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
937       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
938       op == MAT_COLUMNS_UNSORTED ||
939       op == MAT_COLUMNS_SORTED) {
940         MatSetOption(a->A,op);
941         MatSetOption(a->B,op);
942   } else if (op == MAT_ROW_ORIENTED) {
943     a->roworiented = 1;
944     MatSetOption(a->A,op);
945     MatSetOption(a->B,op);
946   } else if (op == MAT_ROWS_SORTED ||
947              op == MAT_ROWS_UNSORTED ||
948              op == MAT_SYMMETRIC ||
949              op == MAT_STRUCTURALLY_SYMMETRIC ||
950              op == MAT_YES_NEW_DIAGONALS)
951     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
952   else if (op == MAT_COLUMN_ORIENTED) {
953     a->roworiented = 0;
954     MatSetOption(a->A,op);
955     MatSetOption(a->B,op);
956   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
957     a->donotstash = 1;
958   } else if (op == MAT_NO_NEW_DIAGONALS)
959     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
960   else
961     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
962   return 0;
963 }
964 
965 #undef __FUNCTION__
966 #define __FUNCTION__ "MatGetSize_MPIAIJ"
967 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
968 {
969   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
970   *m = mat->M; *n = mat->N;
971   return 0;
972 }
973 
974 #undef __FUNCTION__
975 #define __FUNCTION__ "MatGetLocalSize_MPIAIJ"
976 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
977 {
978   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
979   *m = mat->m; *n = mat->N;
980   return 0;
981 }
982 
983 #undef __FUNCTION__
984 #define __FUNCTION__ "MatGetOwnershipRange_MPIAIJ"
985 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
986 {
987   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
988   *m = mat->rstart; *n = mat->rend;
989   return 0;
990 }
991 
992 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
993 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
994 
995 #undef __FUNCTION__
996 #define __FUNCTION__ "MatGetRow_MPIAIJ"
997 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
998 {
999   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1000   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1001   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1002   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1003   int        *cmap, *idx_p;
1004 
1005   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
1006   mat->getrowactive = PETSC_TRUE;
1007 
1008   if (!mat->rowvalues && (idx || v)) {
1009     /*
1010         allocate enough space to hold information from the longest row.
1011     */
1012     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1013     int     max = 1,m = mat->m,tmp;
1014     for ( i=0; i<m; i++ ) {
1015       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1016       if (max < tmp) { max = tmp; }
1017     }
1018     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1019     CHKPTRQ(mat->rowvalues);
1020     mat->rowindices = (int *) (mat->rowvalues + max);
1021   }
1022 
1023   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1024   lrow = row - rstart;
1025 
1026   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1027   if (!v)   {pvA = 0; pvB = 0;}
1028   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1029   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1030   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1031   nztot = nzA + nzB;
1032 
1033   cmap  = mat->garray;
1034   if (v  || idx) {
1035     if (nztot) {
1036       /* Sort by increasing column numbers, assuming A and B already sorted */
1037       int imark = -1;
1038       if (v) {
1039         *v = v_p = mat->rowvalues;
1040         for ( i=0; i<nzB; i++ ) {
1041           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1042           else break;
1043         }
1044         imark = i;
1045         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1046         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1047       }
1048       if (idx) {
1049         *idx = idx_p = mat->rowindices;
1050         if (imark > -1) {
1051           for ( i=0; i<imark; i++ ) {
1052             idx_p[i] = cmap[cworkB[i]];
1053           }
1054         } else {
1055           for ( i=0; i<nzB; i++ ) {
1056             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1057             else break;
1058           }
1059           imark = i;
1060         }
1061         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1062         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1063       }
1064     }
1065     else {
1066       if (idx) *idx = 0;
1067       if (v)   *v   = 0;
1068     }
1069   }
1070   *nz = nztot;
1071   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1072   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1073   return 0;
1074 }
1075 
1076 #undef __FUNCTION__
1077 #define __FUNCTION__ "MatRestoreRow_MPIAIJ"
1078 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1079 {
1080   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1081   if (aij->getrowactive == PETSC_FALSE) {
1082     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
1083   }
1084   aij->getrowactive = PETSC_FALSE;
1085   return 0;
1086 }
1087 
1088 #undef __FUNCTION__
1089 #define __FUNCTION__ "MatNorm_MPIAIJ"
1090 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1091 {
1092   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1093   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1094   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1095   double     sum = 0.0;
1096   Scalar     *v;
1097 
1098   if (aij->size == 1) {
1099     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1100   } else {
1101     if (type == NORM_FROBENIUS) {
1102       v = amat->a;
1103       for (i=0; i<amat->nz; i++ ) {
1104 #if defined(PETSC_COMPLEX)
1105         sum += real(conj(*v)*(*v)); v++;
1106 #else
1107         sum += (*v)*(*v); v++;
1108 #endif
1109       }
1110       v = bmat->a;
1111       for (i=0; i<bmat->nz; i++ ) {
1112 #if defined(PETSC_COMPLEX)
1113         sum += real(conj(*v)*(*v)); v++;
1114 #else
1115         sum += (*v)*(*v); v++;
1116 #endif
1117       }
1118       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1119       *norm = sqrt(*norm);
1120     }
1121     else if (type == NORM_1) { /* max column norm */
1122       double *tmp, *tmp2;
1123       int    *jj, *garray = aij->garray;
1124       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1125       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1126       PetscMemzero(tmp,aij->N*sizeof(double));
1127       *norm = 0.0;
1128       v = amat->a; jj = amat->j;
1129       for ( j=0; j<amat->nz; j++ ) {
1130         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1131       }
1132       v = bmat->a; jj = bmat->j;
1133       for ( j=0; j<bmat->nz; j++ ) {
1134         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1135       }
1136       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1137       for ( j=0; j<aij->N; j++ ) {
1138         if (tmp2[j] > *norm) *norm = tmp2[j];
1139       }
1140       PetscFree(tmp); PetscFree(tmp2);
1141     }
1142     else if (type == NORM_INFINITY) { /* max row norm */
1143       double ntemp = 0.0;
1144       for ( j=0; j<amat->m; j++ ) {
1145         v = amat->a + amat->i[j] + shift;
1146         sum = 0.0;
1147         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1148           sum += PetscAbsScalar(*v); v++;
1149         }
1150         v = bmat->a + bmat->i[j] + shift;
1151         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1152           sum += PetscAbsScalar(*v); v++;
1153         }
1154         if (sum > ntemp) ntemp = sum;
1155       }
1156       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1157     }
1158     else {
1159       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1160     }
1161   }
1162   return 0;
1163 }
1164 
1165 #undef __FUNCTION__
1166 #define __FUNCTION__ "MatTranspose_MPIAIJ"
1167 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1170   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1171   int        ierr,shift = Aloc->indexshift;
1172   Mat        B;
1173   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1174   Scalar     *array;
1175 
1176   if (matout == PETSC_NULL && M != N)
1177     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1178   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1179          PETSC_NULL,&B); CHKERRQ(ierr);
1180 
1181   /* copy over the A part */
1182   Aloc = (Mat_SeqAIJ*) a->A->data;
1183   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1184   row = a->rstart;
1185   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1186   for ( i=0; i<m; i++ ) {
1187     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1188     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1189   }
1190   aj = Aloc->j;
1191   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1192 
1193   /* copy over the B part */
1194   Aloc = (Mat_SeqAIJ*) a->B->data;
1195   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1196   row = a->rstart;
1197   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1198   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1199   for ( i=0; i<m; i++ ) {
1200     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1201     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1202   }
1203   PetscFree(ct);
1204   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1205   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1206   if (matout != PETSC_NULL) {
1207     *matout = B;
1208   } else {
1209     /* This isn't really an in-place transpose .... but free data structures from a */
1210     PetscFree(a->rowners);
1211     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1212     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1213     if (a->colmap) PetscFree(a->colmap);
1214     if (a->garray) PetscFree(a->garray);
1215     if (a->lvec) VecDestroy(a->lvec);
1216     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1217     PetscFree(a);
1218     PetscMemcpy(A,B,sizeof(struct _Mat));
1219     PetscHeaderDestroy(B);
1220   }
1221   return 0;
1222 }
1223 
1224 #undef __FUNCTION__
1225 #define __FUNCTION__ "MatDiagonalScale_MPIAIJ"
1226 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1227 {
1228   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1229   Mat a = aij->A, b = aij->B;
1230   int ierr,s1,s2,s3;
1231 
1232   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1233   if (rr) {
1234     s3 = aij->n;
1235     VecGetLocalSize_Fast(rr,s1);
1236     if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size");
1237     /* Overlap communication with computation. */
1238     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1239   }
1240   if (ll) {
1241     VecGetLocalSize_Fast(ll,s1);
1242     if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size");
1243     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1244   }
1245   /* scale  the diagonal block */
1246   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1247 
1248   if (rr) {
1249     /* Do a scatter end and then right scale the off-diagonal block */
1250     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1251     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1252   }
1253 
1254   return 0;
1255 }
1256 
1257 
1258 extern int MatPrintHelp_SeqAIJ(Mat);
1259 #undef __FUNCTION__
1260 #define __FUNCTION__ "MatPrintHelp_MPIAIJ"
1261 static int MatPrintHelp_MPIAIJ(Mat A)
1262 {
1263   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1264 
1265   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1266   else return 0;
1267 }
1268 
1269 #undef __FUNCTION__
1270 #define __FUNCTION__ "MatGetBlockSize_MPIAIJ"
1271 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1272 {
1273   *bs = 1;
1274   return 0;
1275 }
1276 
1277 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1278 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1279 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1280 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1281 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1282 /* -------------------------------------------------------------------*/
1283 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1284        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1285        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1286        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1287        0,0,
1288        0,0,
1289        0,0,
1290        MatRelax_MPIAIJ,
1291        MatTranspose_MPIAIJ,
1292        MatGetInfo_MPIAIJ,0,
1293        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1294        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1295        0,
1296        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1297        0,0,0,0,
1298        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1299        0,0,
1300        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1301        0,0,0,
1302        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1303        MatPrintHelp_MPIAIJ,
1304        MatScale_MPIAIJ,0,0,0,
1305        MatGetBlockSize_MPIAIJ,0,0,0,0,
1306        MatFDColoringCreate_MPIAIJ};
1307 
1308 
1309 #undef __FUNCTION__
1310 #define __FUNCTION__ "MatCreateMPIAIJ"
1311 /*@C
1312    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1313    (the default parallel PETSc format).  For good matrix assembly performance
1314    the user should preallocate the matrix storage by setting the parameters
1315    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1316    performance can be increased by more than a factor of 50.
1317 
1318    Input Parameters:
1319 .  comm - MPI communicator
1320 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1321            This value should be the same as the local size used in creating the
1322            y vector for the matrix-vector product y = Ax.
1323 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1324            This value should be the same as the local size used in creating the
1325            x vector for the matrix-vector product y = Ax.
1326 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1327 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1328 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1329            (same for all local rows)
1330 .  d_nzz - array containing the number of nonzeros in the various rows of the
1331            diagonal portion of the local submatrix (possibly different for each row)
1332            or PETSC_NULL. You must leave room for the diagonal entry even if
1333            it is zero.
1334 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1335            submatrix (same for all local rows).
1336 .  o_nzz - array containing the number of nonzeros in the various rows of the
1337            off-diagonal portion of the local submatrix (possibly different for
1338            each row) or PETSC_NULL.
1339 
1340    Output Parameter:
1341 .  A - the matrix
1342 
1343    Notes:
1344    The AIJ format (also called the Yale sparse matrix format or
1345    compressed row storage), is fully compatible with standard Fortran 77
1346    storage.  That is, the stored row and column indices can begin at
1347    either one (as in Fortran) or zero.  See the users manual for details.
1348 
1349    The user MUST specify either the local or global matrix dimensions
1350    (possibly both).
1351 
1352    By default, this format uses inodes (identical nodes) when possible.
1353    We search for consecutive rows with the same nonzero structure, thereby
1354    reusing matrix information to achieve increased efficiency.
1355 
1356    Options Database Keys:
1357 $    -mat_aij_no_inode  - Do not use inodes
1358 $    -mat_aij_inode_limit <limit> - Set inode limit.
1359 $        (max limit=5)
1360 $    -mat_aij_oneindex - Internally use indexing starting at 1
1361 $        rather than 0.  Note: When calling MatSetValues(),
1362 $        the user still MUST index entries starting at 0!
1363 
1364    Storage Information:
1365    For a square global matrix we define each processor's diagonal portion
1366    to be its local rows and the corresponding columns (a square submatrix);
1367    each processor's off-diagonal portion encompasses the remainder of the
1368    local matrix (a rectangular submatrix).
1369 
1370    The user can specify preallocated storage for the diagonal part of
1371    the local submatrix with either d_nz or d_nnz (not both).  Set
1372    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1373    memory allocation.  Likewise, specify preallocated storage for the
1374    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1375 
1376    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1377    the figure below we depict these three local rows and all columns (0-11).
1378 
1379 $          0 1 2 3 4 5 6 7 8 9 10 11
1380 $         -------------------
1381 $  row 3  |  o o o d d d o o o o o o
1382 $  row 4  |  o o o d d d o o o o o o
1383 $  row 5  |  o o o d d d o o o o o o
1384 $         -------------------
1385 $
1386 
1387    Thus, any entries in the d locations are stored in the d (diagonal)
1388    submatrix, and any entries in the o locations are stored in the
1389    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1390    stored simply in the MATSEQAIJ format for compressed row storage.
1391 
1392    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1393    and o_nz should indicate the number of nonzeros per row in the o matrix.
1394    In general, for PDE problems in which most nonzeros are near the diagonal,
1395    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1396    or you will get TERRIBLE performance; see the users' manual chapter on
1397    matrices.
1398 
1399 .keywords: matrix, aij, compressed row, sparse, parallel
1400 
1401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1402 @*/
1403 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1404                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1405 {
1406   Mat          B;
1407   Mat_MPIAIJ   *b;
1408   int          ierr, i,sum[2],work[2],size;
1409 
1410   *A = 0;
1411   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1412   PLogObjectCreate(B);
1413   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1414   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1415   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1416   MPI_Comm_size(comm,&size);
1417   if (size == 1) {
1418     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1419     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1420     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1421     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1422     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1423     B->ops.solve             = MatSolve_MPIAIJ;
1424     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1425     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1426     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1427     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1428   }
1429   B->destroy    = MatDestroy_MPIAIJ;
1430   B->view       = MatView_MPIAIJ;
1431   B->factor     = 0;
1432   B->assembled  = PETSC_FALSE;
1433   B->mapping    = 0;
1434 
1435   b->insertmode = NOT_SET_VALUES;
1436   MPI_Comm_rank(comm,&b->rank);
1437   MPI_Comm_size(comm,&b->size);
1438 
1439   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1440     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1441 
1442   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1443     work[0] = m; work[1] = n;
1444     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1445     if (M == PETSC_DECIDE) M = sum[0];
1446     if (N == PETSC_DECIDE) N = sum[1];
1447   }
1448   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1449   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1450   b->m = m; B->m = m;
1451   b->n = n; B->n = n;
1452   b->N = N; B->N = N;
1453   b->M = M; B->M = M;
1454 
1455   /* build local table of row and column ownerships */
1456   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1457   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1458   b->cowners = b->rowners + b->size + 2;
1459   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1460   b->rowners[0] = 0;
1461   for ( i=2; i<=b->size; i++ ) {
1462     b->rowners[i] += b->rowners[i-1];
1463   }
1464   b->rstart = b->rowners[b->rank];
1465   b->rend   = b->rowners[b->rank+1];
1466   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1467   b->cowners[0] = 0;
1468   for ( i=2; i<=b->size; i++ ) {
1469     b->cowners[i] += b->cowners[i-1];
1470   }
1471   b->cstart = b->cowners[b->rank];
1472   b->cend   = b->cowners[b->rank+1];
1473 
1474   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1475   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1476   PLogObjectParent(B,b->A);
1477   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1478   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1479   PLogObjectParent(B,b->B);
1480 
1481   /* build cache for off array entries formed */
1482   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1483   b->donotstash  = 0;
1484   b->colmap      = 0;
1485   b->garray      = 0;
1486   b->roworiented = 1;
1487 
1488   /* stuff used for matrix vector multiply */
1489   b->lvec      = 0;
1490   b->Mvctx     = 0;
1491 
1492   /* stuff for MatGetRow() */
1493   b->rowindices   = 0;
1494   b->rowvalues    = 0;
1495   b->getrowactive = PETSC_FALSE;
1496 
1497   *A = B;
1498   return 0;
1499 }
1500 
1501 #undef __FUNCTION__
1502 #define __FUNCTION__ "MatConvertSameType_MPIAIJ"
1503 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1504 {
1505   Mat        mat;
1506   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1507   int        ierr, len=0, flg;
1508 
1509   *newmat       = 0;
1510   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1511   PLogObjectCreate(mat);
1512   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1513   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1514   mat->destroy    = MatDestroy_MPIAIJ;
1515   mat->view       = MatView_MPIAIJ;
1516   mat->factor     = matin->factor;
1517   mat->assembled  = PETSC_TRUE;
1518 
1519   a->m = mat->m   = oldmat->m;
1520   a->n = mat->n   = oldmat->n;
1521   a->M = mat->M   = oldmat->M;
1522   a->N = mat->N   = oldmat->N;
1523 
1524   a->rstart       = oldmat->rstart;
1525   a->rend         = oldmat->rend;
1526   a->cstart       = oldmat->cstart;
1527   a->cend         = oldmat->cend;
1528   a->size         = oldmat->size;
1529   a->rank         = oldmat->rank;
1530   a->insertmode   = NOT_SET_VALUES;
1531   a->rowvalues    = 0;
1532   a->getrowactive = PETSC_FALSE;
1533 
1534   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1535   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1536   a->cowners = a->rowners + a->size + 2;
1537   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1538   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1539   if (oldmat->colmap) {
1540     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1541     PLogObjectMemory(mat,(a->N)*sizeof(int));
1542     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1543   } else a->colmap = 0;
1544   if (oldmat->garray) {
1545     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1546     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1547     PLogObjectMemory(mat,len*sizeof(int));
1548     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1549   } else a->garray = 0;
1550 
1551   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1552   PLogObjectParent(mat,a->lvec);
1553   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1554   PLogObjectParent(mat,a->Mvctx);
1555   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1556   PLogObjectParent(mat,a->A);
1557   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1558   PLogObjectParent(mat,a->B);
1559   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1560   if (flg) {
1561     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1562   }
1563   *newmat = mat;
1564   return 0;
1565 }
1566 
1567 #include "sys.h"
1568 
1569 #undef __FUNCTION__
1570 #define __FUNCTION__ "MatLoad_MPIAIJ"
1571 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1572 {
1573   Mat          A;
1574   int          i, nz, ierr, j,rstart, rend, fd;
1575   Scalar       *vals,*svals;
1576   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1577   MPI_Status   status;
1578   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1579   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1580   int          tag = ((PetscObject)viewer)->tag;
1581 
1582   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1583   if (!rank) {
1584     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1585     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1586     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1587   }
1588 
1589   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1590   M = header[1]; N = header[2];
1591   /* determine ownership of all rows */
1592   m = M/size + ((M % size) > rank);
1593   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1594   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1595   rowners[0] = 0;
1596   for ( i=2; i<=size; i++ ) {
1597     rowners[i] += rowners[i-1];
1598   }
1599   rstart = rowners[rank];
1600   rend   = rowners[rank+1];
1601 
1602   /* distribute row lengths to all processors */
1603   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1604   offlens = ourlens + (rend-rstart);
1605   if (!rank) {
1606     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1607     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1608     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1609     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1610     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1611     PetscFree(sndcounts);
1612   }
1613   else {
1614     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1615   }
1616 
1617   if (!rank) {
1618     /* calculate the number of nonzeros on each processor */
1619     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1620     PetscMemzero(procsnz,size*sizeof(int));
1621     for ( i=0; i<size; i++ ) {
1622       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1623         procsnz[i] += rowlengths[j];
1624       }
1625     }
1626     PetscFree(rowlengths);
1627 
1628     /* determine max buffer needed and allocate it */
1629     maxnz = 0;
1630     for ( i=0; i<size; i++ ) {
1631       maxnz = PetscMax(maxnz,procsnz[i]);
1632     }
1633     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1634 
1635     /* read in my part of the matrix column indices  */
1636     nz = procsnz[0];
1637     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1638     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1639 
1640     /* read in every one elses and ship off */
1641     for ( i=1; i<size; i++ ) {
1642       nz = procsnz[i];
1643       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1644       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1645     }
1646     PetscFree(cols);
1647   }
1648   else {
1649     /* determine buffer space needed for message */
1650     nz = 0;
1651     for ( i=0; i<m; i++ ) {
1652       nz += ourlens[i];
1653     }
1654     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1655 
1656     /* receive message of column indices*/
1657     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1658     MPI_Get_count(&status,MPI_INT,&maxnz);
1659     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1660   }
1661 
1662   /* loop over local rows, determining number of off diagonal entries */
1663   PetscMemzero(offlens,m*sizeof(int));
1664   jj = 0;
1665   for ( i=0; i<m; i++ ) {
1666     for ( j=0; j<ourlens[i]; j++ ) {
1667       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1668       jj++;
1669     }
1670   }
1671 
1672   /* create our matrix */
1673   for ( i=0; i<m; i++ ) {
1674     ourlens[i] -= offlens[i];
1675   }
1676   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1677   A = *newmat;
1678   MatSetOption(A,MAT_COLUMNS_SORTED);
1679   for ( i=0; i<m; i++ ) {
1680     ourlens[i] += offlens[i];
1681   }
1682 
1683   if (!rank) {
1684     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1685 
1686     /* read in my part of the matrix numerical values  */
1687     nz = procsnz[0];
1688     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1689 
1690     /* insert into matrix */
1691     jj      = rstart;
1692     smycols = mycols;
1693     svals   = vals;
1694     for ( i=0; i<m; i++ ) {
1695       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1696       smycols += ourlens[i];
1697       svals   += ourlens[i];
1698       jj++;
1699     }
1700 
1701     /* read in other processors and ship out */
1702     for ( i=1; i<size; i++ ) {
1703       nz = procsnz[i];
1704       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1705       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1706     }
1707     PetscFree(procsnz);
1708   }
1709   else {
1710     /* receive numeric values */
1711     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1712 
1713     /* receive message of values*/
1714     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1715     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1716     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1717 
1718     /* insert into matrix */
1719     jj      = rstart;
1720     smycols = mycols;
1721     svals   = vals;
1722     for ( i=0; i<m; i++ ) {
1723       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1724       smycols += ourlens[i];
1725       svals   += ourlens[i];
1726       jj++;
1727     }
1728   }
1729   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1730 
1731   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1732   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1733   return 0;
1734 }
1735