xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f5e9677a6d02c22ff01eba66abf996f4c40596e1)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.186 1997/01/14 17:18:18 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 #undef __FUNC__
15 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
19   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
20   int        n = B->n,i;
21 
22   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
23   PLogObjectMemory(mat,aij->N*sizeof(int));
24   PetscMemzero(aij->colmap,aij->N*sizeof(int));
25   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
26   return 0;
27 }
28 
29 extern int DisAssemble_MPIAIJ(Mat);
30 
31 #undef __FUNC__
32 #define __FUNC__ "MatGetRowIJ_MPIAIJ"
33 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
34                            PetscTruth *done)
35 {
36   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
37   int        ierr;
38   if (aij->size == 1) {
39     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
40   } else SETERRQ(1,0,"not supported in parallel");
41   return 0;
42 }
43 
44 #undef __FUNC__
45 #define __FUNC__ "MatRestoreRowIJ_MPIAIJ"
46 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
47                                PetscTruth *done)
48 {
49   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
50   int        ierr;
51   if (aij->size == 1) {
52     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
53   } else SETERRQ(1,0,"not supported in parallel");
54   return 0;
55 }
56 
57 #define CHUNKSIZE   15
58 #define MatSetValues_SeqAIJ_A_Private(A,row,col,value,addv) \
59 { \
60   Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data; \
61   int        *rp,ii,nrow,_i,rmax, N, col1; \
62   int        *imax = a->imax, *ai = a->i, *ailen = a->ilen; \
63   int        *aj = a->j, nonew = a->nonew,shift = a->indexshift; \
64   Scalar     *ap, *aa = a->a; \
65  \
66     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
67     rmax = imax[row]; nrow = ailen[row];  \
68     col1 = col - shift; \
69      \
70       for ( _i=0; _i<nrow; _i++ ) { \
71         if (rp[_i] > col1) break; \
72         if (rp[_i] == col1) { \
73           if (addv == ADD_VALUES) ap[_i] += value;   \
74           else                  ap[_i] = value; \
75           goto _noinsert; \
76         } \
77       }  \
78       if (nonew)  goto _noinsert; \
79       if (nrow >= rmax) { \
80         /* there is no extra room in row, therefore enlarge */ \
81         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
82         Scalar *new_a; \
83  \
84         /* malloc new storage space */ \
85         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
86         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
87         new_j   = (int *) (new_a + new_nz); \
88         new_i   = new_j + new_nz; \
89  \
90         /* copy over old data into new slots */ \
91         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
92         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
93         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
94         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
95         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
96                                                            len*sizeof(int)); \
97         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
98         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
99                                                            len*sizeof(Scalar));  \
100         /* free up old matrix storage */ \
101  \
102         PetscFree(a->a);  \
103         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
104         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
105         a->singlemalloc = 1; \
106  \
107         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
108         rmax = imax[row] = imax[row] + CHUNKSIZE; \
109         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
110         a->maxnz += CHUNKSIZE; \
111         a->reallocs++; \
112       } \
113       N = nrow++ - 1; a->nz++; \
114       /* shift up all the later entries in this row */ \
115       for ( ii=N; ii>=_i; ii-- ) { \
116         rp[ii+1] = rp[ii]; \
117         ap[ii+1] = ap[ii]; \
118       } \
119       rp[_i] = col1;  \
120       ap[_i] = value;  \
121       _noinsert: ; \
122       ailen[row] = nrow; \
123 }
124 
125 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
126 #undef __FUNC__
127 #define __FUNC__ "MatSetValues_MPIAIJ"
128 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
129 {
130   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
131   Scalar     value;
132   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
133   int        cstart = aij->cstart, cend = aij->cend,row,col;
134   int        roworiented = aij->roworiented;
135 
136   /* Some Variables required in the macro */
137   /*  Mat        A = aij->A;
138   Mat_SeqAIJ *a = (Mat_SeqAIJ *) (A)->data;
139   int        *rp,ii,nrow,_i,rmax,N,col2;
140   int        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
141   int        *aj=a->j,shift=a->indexshift;
142   Scalar     *ap,*aa=a->a;
143   */
144 #if defined(PETSC_BOPT_g)
145   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
146     SETERRQ(1,0,"Cannot mix inserts and adds");
147   }
148 #endif
149   aij->insertmode = addv;
150   for ( i=0; i<m; i++ ) {
151 #if defined(PETSC_BOPT_g)
152     if (im[i] < 0) SETERRQ(1,0,"Negative row");
153     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
154 #endif
155     if (im[i] >= rstart && im[i] < rend) {
156       row = im[i] - rstart;
157       for ( j=0; j<n; j++ ) {
158         if (in[j] >= cstart && in[j] < cend){
159           col = in[j] - cstart;
160           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
161           MatSetValues_SeqAIJ_A_Private(aij->A,row,col,value,addv);
162           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
163         }
164 #if defined(PETSC_BOPT_g)
165         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
166         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
167 #endif
168         else {
169           if (mat->was_assembled) {
170             if (!aij->colmap) {
171               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
172             }
173             col = aij->colmap[in[j]] - 1;
174             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
175               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
176               col =  in[j];
177             }
178           }
179           else col = in[j];
180           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
181           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
182         }
183       }
184     }
185     else {
186       if (roworiented && !aij->donotstash) {
187         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
188       }
189       else {
190         if (!aij->donotstash) {
191           row = im[i];
192           for ( j=0; j<n; j++ ) {
193             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
194           }
195         }
196       }
197     }
198   }
199   return 0;
200 }
201 
202 #undef __FUNC__
203 #define __FUNC__ "MatGetValues_MPIAIJ"
204 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
205 {
206   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
207   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
208   int        cstart = aij->cstart, cend = aij->cend,row,col;
209 
210   for ( i=0; i<m; i++ ) {
211     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
212     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
213     if (idxm[i] >= rstart && idxm[i] < rend) {
214       row = idxm[i] - rstart;
215       for ( j=0; j<n; j++ ) {
216         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
217         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
218         if (idxn[j] >= cstart && idxn[j] < cend){
219           col = idxn[j] - cstart;
220           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
221         }
222         else {
223           if (!aij->colmap) {
224             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
225           }
226           col = aij->colmap[idxn[j]] - 1;
227           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
228           else {
229             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
230           }
231         }
232       }
233     }
234     else {
235       SETERRQ(1,0,"Only local values currently supported");
236     }
237   }
238   return 0;
239 }
240 
241 #undef __FUNC__
242 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
243 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
244 {
245   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
246   MPI_Comm    comm = mat->comm;
247   int         size = aij->size, *owners = aij->rowners;
248   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
249   MPI_Request *send_waits,*recv_waits;
250   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
251   InsertMode  addv;
252   Scalar      *rvalues,*svalues;
253 
254   /* make sure all processors are either in INSERTMODE or ADDMODE */
255   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
256   if (addv == (ADD_VALUES|INSERT_VALUES)) {
257     SETERRQ(1,0,"Some processors inserted others added");
258   }
259   aij->insertmode = addv; /* in case this processor had no cache */
260 
261   /*  first count number of contributors to each processor */
262   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
263   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
264   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
265   for ( i=0; i<aij->stash.n; i++ ) {
266     idx = aij->stash.idx[i];
267     for ( j=0; j<size; j++ ) {
268       if (idx >= owners[j] && idx < owners[j+1]) {
269         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
270       }
271     }
272   }
273   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
274 
275   /* inform other processors of number of messages and max length*/
276   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
277   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
278   nreceives = work[rank];
279   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
280   nmax = work[rank];
281   PetscFree(work);
282 
283   /* post receives:
284        1) each message will consist of ordered pairs
285      (global index,value) we store the global index as a double
286      to simplify the message passing.
287        2) since we don't know how long each individual message is we
288      allocate the largest needed buffer for each receive. Potentially
289      this is a lot of wasted space.
290 
291 
292        This could be done better.
293   */
294   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
295   CHKPTRQ(rvalues);
296   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
297   CHKPTRQ(recv_waits);
298   for ( i=0; i<nreceives; i++ ) {
299     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
300               comm,recv_waits+i);
301   }
302 
303   /* do sends:
304       1) starts[i] gives the starting index in svalues for stuff going to
305          the ith processor
306   */
307   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
308   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
309   CHKPTRQ(send_waits);
310   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
311   starts[0] = 0;
312   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
313   for ( i=0; i<aij->stash.n; i++ ) {
314     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
315     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
316     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
317   }
318   PetscFree(owner);
319   starts[0] = 0;
320   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
321   count = 0;
322   for ( i=0; i<size; i++ ) {
323     if (procs[i]) {
324       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
325                 comm,send_waits+count++);
326     }
327   }
328   PetscFree(starts); PetscFree(nprocs);
329 
330   /* Free cache space */
331   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
332   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
333 
334   aij->svalues    = svalues;    aij->rvalues    = rvalues;
335   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
336   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
337   aij->rmax       = nmax;
338 
339   return 0;
340 }
341 extern int MatSetUpMultiply_MPIAIJ(Mat);
342 
343 #undef __FUNC__
344 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
345 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
348   MPI_Status  *send_status,recv_status;
349   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
350   int         row,col,other_disassembled;
351   Scalar      *values,val;
352   InsertMode  addv = aij->insertmode;
353 
354   /*  wait on receives */
355   while (count) {
356     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
357     /* unpack receives into our local space */
358     values = aij->rvalues + 3*imdex*aij->rmax;
359     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
360     n = n/3;
361     for ( i=0; i<n; i++ ) {
362       row = (int) PetscReal(values[3*i]) - aij->rstart;
363       col = (int) PetscReal(values[3*i+1]);
364       val = values[3*i+2];
365       if (col >= aij->cstart && col < aij->cend) {
366         col -= aij->cstart;
367         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
368       }
369       else {
370         if (mat->was_assembled || mat->assembled) {
371           if (!aij->colmap) {
372             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
373           }
374           col = aij->colmap[col] - 1;
375           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
376             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
377             col = (int) PetscReal(values[3*i+1]);
378           }
379         }
380         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
381       }
382     }
383     count--;
384   }
385   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
386 
387   /* wait on sends */
388   if (aij->nsends) {
389     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
390     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
391     PetscFree(send_status);
392   }
393   PetscFree(aij->send_waits); PetscFree(aij->svalues);
394 
395   aij->insertmode = NOT_SET_VALUES;
396   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
398 
399   /* determine if any processor has disassembled, if so we must
400      also disassemble ourselfs, in order that we may reassemble. */
401   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
402   if (mat->was_assembled && !other_disassembled) {
403     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
404   }
405 
406   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
407     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
408   }
409   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
410   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
411 
412   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
413   return 0;
414 }
415 
416 #undef __FUNC__
417 #define __FUNC__ "MatZeroEntries_MPIAIJ"
418 static int MatZeroEntries_MPIAIJ(Mat A)
419 {
420   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
421   int        ierr;
422   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
424   return 0;
425 }
426 
427 /* the code does not do the diagonal entries correctly unless the
428    matrix is square and the column and row owerships are identical.
429    This is a BUG. The only way to fix it seems to be to access
430    aij->A and aij->B directly and not through the MatZeroRows()
431    routine.
432 */
433 #undef __FUNC__
434 #define __FUNC__ "MatZeroRows_MPIAIJ"
435 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
438   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
439   int            *procs,*nprocs,j,found,idx,nsends,*work;
440   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
441   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
442   int            *lens,imdex,*lrows,*values;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447 
448   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
453   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
454   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
455   for ( i=0; i<N; i++ ) {
456     idx = rows[i];
457     found = 0;
458     for ( j=0; j<size; j++ ) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
461       }
462     }
463     if (!found) SETERRQ(1,0,"Index out of range");
464   }
465   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
466 
467   /* inform other processors of number of messages and max length*/
468   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
469   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
470   nrecvs = work[rank];
471   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
472   nmax = work[rank];
473   PetscFree(work);
474 
475   /* post receives:   */
476   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
477   CHKPTRQ(rvalues);
478   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
479   CHKPTRQ(recv_waits);
480   for ( i=0; i<nrecvs; i++ ) {
481     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
489   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
490   CHKPTRQ(send_waits);
491   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
492   starts[0] = 0;
493   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
494   for ( i=0; i<N; i++ ) {
495     svalues[starts[owner[i]]++] = rows[i];
496   }
497   ISRestoreIndices(is,&rows);
498 
499   starts[0] = 0;
500   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
501   count = 0;
502   for ( i=0; i<size; i++ ) {
503     if (procs[i]) {
504       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
505     }
506   }
507   PetscFree(starts);
508 
509   base = owners[rank];
510 
511   /*  wait on receives */
512   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
513   source = lens + nrecvs;
514   count  = nrecvs; slen = 0;
515   while (count) {
516     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
517     /* unpack receives into our local space */
518     MPI_Get_count(&recv_status,MPI_INT,&n);
519     source[imdex]  = recv_status.MPI_SOURCE;
520     lens[imdex]  = n;
521     slen += n;
522     count--;
523   }
524   PetscFree(recv_waits);
525 
526   /* move the data into the send scatter */
527   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
528   count = 0;
529   for ( i=0; i<nrecvs; i++ ) {
530     values = rvalues + i*nmax;
531     for ( j=0; j<lens[i]; j++ ) {
532       lrows[count++] = values[j] - base;
533     }
534   }
535   PetscFree(rvalues); PetscFree(lens);
536   PetscFree(owner); PetscFree(nprocs);
537 
538   /* actually zap the local rows */
539   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540   PLogObjectParent(A,istmp);
541   PetscFree(lrows);
542   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
543   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
544   ierr = ISDestroy(istmp); CHKERRQ(ierr);
545 
546   /* wait on sends */
547   if (nsends) {
548     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
549     CHKPTRQ(send_status);
550     MPI_Waitall(nsends,send_waits,send_status);
551     PetscFree(send_status);
552   }
553   PetscFree(send_waits); PetscFree(svalues);
554 
555   return 0;
556 }
557 
558 #undef __FUNC__
559 #define __FUNC__ "MatMult_MPIAIJ"
560 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
561 {
562   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
563   int        ierr,nt;
564 
565   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
566   if (nt != a->n) {
567     SETERRQ(1,0,"Incompatible parition of A and xx");
568   }
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
570   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
572   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
573   return 0;
574 }
575 
576 #undef __FUNC__
577 #define __FUNC__ "MatMultAdd_MPIAIJ"
578 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
579 {
580   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
581   int        ierr;
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
586   return 0;
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatMultTrans_MPIAIJ"
591 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
594   int        ierr;
595 
596   /* do nondiagonal part */
597   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
598   /* send it on its way */
599   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
600   /* do local part */
601   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
602   /* receive remote parts: note this assumes the values are not actually */
603   /* inserted in yy until the next line, which is true for my implementation*/
604   /* but is not perhaps always true. */
605   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
606   return 0;
607 }
608 
609 #undef __FUNC__
610 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
611 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
614   int        ierr;
615 
616   /* do nondiagonal part */
617   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
626   return 0;
627 }
628 
629 /*
630   This only works correctly for square matrices where the subblock A->A is the
631    diagonal block
632 */
633 #undef __FUNC__
634 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
635 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
638   if (a->M != a->N)
639     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
640   if (a->rstart != a->cstart || a->rend != a->cend) {
641     SETERRQ(1,0,"row partition must equal col partition");  }
642   return MatGetDiagonal(a->A,v);
643 }
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatScale_MPIAIJ"
647 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
648 {
649   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
650   int        ierr;
651   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
652   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
653   return 0;
654 }
655 
656 #undef __FUNC__
657 #define __FUNC__ "MatDestroy_MPIAIJ"
658 static int MatDestroy_MPIAIJ(PetscObject obj)
659 {
660   Mat        mat = (Mat) obj;
661   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
662   int        ierr;
663 #if defined(PETSC_LOG)
664   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
665 #endif
666   PetscFree(aij->rowners);
667   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
668   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
669   if (aij->colmap) PetscFree(aij->colmap);
670   if (aij->garray) PetscFree(aij->garray);
671   if (aij->lvec)   VecDestroy(aij->lvec);
672   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
673   if (aij->rowvalues) PetscFree(aij->rowvalues);
674   PetscFree(aij);
675   if (mat->mapping) {
676     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
677   }
678   PLogObjectDestroy(mat);
679   PetscHeaderDestroy(mat);
680   return 0;
681 }
682 #include "draw.h"
683 #include "pinclude/pviewer.h"
684 
685 #undef __FUNC__
686 #define __FUNC__ "MatView_MPIAIJ_Binary"
687 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
688 {
689   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
690   int         ierr;
691 
692   if (aij->size == 1) {
693     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
694   }
695   else SETERRQ(1,0,"Only uniprocessor output supported");
696   return 0;
697 }
698 
699 #undef __FUNC__
700 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab"
701 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
702 {
703   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
704   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
705   int         ierr, format,shift = C->indexshift,rank;
706   FILE        *fd;
707   ViewerType  vtype;
708 
709   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
710   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
711     ierr = ViewerGetFormat(viewer,&format);
712     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
713       MatInfo info;
714       int     flg;
715       MPI_Comm_rank(mat->comm,&rank);
716       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
717       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
718       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
719       PetscSequentialPhaseBegin(mat->comm,1);
720       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
721          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
722       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
723          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
724       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
725       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
726       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
727       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
728       fflush(fd);
729       PetscSequentialPhaseEnd(mat->comm,1);
730       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
731       return 0;
732     }
733     else if (format == VIEWER_FORMAT_ASCII_INFO) {
734       return 0;
735     }
736   }
737 
738   if (vtype == DRAW_VIEWER) {
739     Draw       draw;
740     PetscTruth isnull;
741     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
742     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
743   }
744 
745   if (vtype == ASCII_FILE_VIEWER) {
746     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
747     PetscSequentialPhaseBegin(mat->comm,1);
748     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
749            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
750            aij->cend);
751     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
752     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
753     fflush(fd);
754     PetscSequentialPhaseEnd(mat->comm,1);
755   }
756   else {
757     int size = aij->size;
758     rank = aij->rank;
759     if (size == 1) {
760       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
761     }
762     else {
763       /* assemble the entire matrix onto first processor. */
764       Mat         A;
765       Mat_SeqAIJ *Aloc;
766       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
767       Scalar      *a;
768 
769       if (!rank) {
770         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
771                CHKERRQ(ierr);
772       }
773       else {
774         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
775                CHKERRQ(ierr);
776       }
777       PLogObjectParent(mat,A);
778 
779       /* copy over the A part */
780       Aloc = (Mat_SeqAIJ*) aij->A->data;
781       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
782       row = aij->rstart;
783       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
784       for ( i=0; i<m; i++ ) {
785         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
786         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
787       }
788       aj = Aloc->j;
789       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
790 
791       /* copy over the B part */
792       Aloc = (Mat_SeqAIJ*) aij->B->data;
793       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
794       row = aij->rstart;
795       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
796       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
797       for ( i=0; i<m; i++ ) {
798         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
799         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
800       }
801       PetscFree(ct);
802       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
803       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
804       if (!rank) {
805         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
806       }
807       ierr = MatDestroy(A); CHKERRQ(ierr);
808     }
809   }
810   return 0;
811 }
812 
813 #undef __FUNC__
814 #define __FUNC__ "MatView_MPIAIJ"
815 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
816 {
817   Mat         mat = (Mat) obj;
818   int         ierr;
819   ViewerType  vtype;
820 
821   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
822   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
823       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
824     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
825   }
826   else if (vtype == BINARY_FILE_VIEWER) {
827     return MatView_MPIAIJ_Binary(mat,viewer);
828   }
829   return 0;
830 }
831 
832 /*
833     This has to provide several versions.
834 
835      2) a) use only local smoothing updating outer values only once.
836         b) local smoothing updating outer values each inner iteration
837      3) color updating out values betwen colors.
838 */
839 #undef __FUNC__
840 #define __FUNC__ "MatRelax_MPIAIJ"
841 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
842                            double fshift,int its,Vec xx)
843 {
844   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
845   Mat        AA = mat->A, BB = mat->B;
846   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
847   Scalar     *b,*x,*xs,*ls,d,*v,sum;
848   int        ierr,*idx, *diag;
849   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
850 
851   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
852   xs = x + shift; /* shift by one for index start of 1 */
853   ls = ls + shift;
854   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
855   diag = A->diag;
856   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
857     if (flag & SOR_ZERO_INITIAL_GUESS) {
858       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
859     }
860     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
861     CHKERRQ(ierr);
862     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
863     CHKERRQ(ierr);
864     while (its--) {
865       /* go down through the rows */
866       for ( i=0; i<m; i++ ) {
867         n    = A->i[i+1] - A->i[i];
868         idx  = A->j + A->i[i] + shift;
869         v    = A->a + A->i[i] + shift;
870         sum  = b[i];
871         SPARSEDENSEMDOT(sum,xs,v,idx,n);
872         d    = fshift + A->a[diag[i]+shift];
873         n    = B->i[i+1] - B->i[i];
874         idx  = B->j + B->i[i] + shift;
875         v    = B->a + B->i[i] + shift;
876         SPARSEDENSEMDOT(sum,ls,v,idx,n);
877         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
878       }
879       /* come up through the rows */
880       for ( i=m-1; i>-1; i-- ) {
881         n    = A->i[i+1] - A->i[i];
882         idx  = A->j + A->i[i] + shift;
883         v    = A->a + A->i[i] + shift;
884         sum  = b[i];
885         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886         d    = fshift + A->a[diag[i]+shift];
887         n    = B->i[i+1] - B->i[i];
888         idx  = B->j + B->i[i] + shift;
889         v    = B->a + B->i[i] + shift;
890         SPARSEDENSEMDOT(sum,ls,v,idx,n);
891         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
892       }
893     }
894   }
895   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
896     if (flag & SOR_ZERO_INITIAL_GUESS) {
897       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
898     }
899     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
900     CHKERRQ(ierr);
901     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
902     CHKERRQ(ierr);
903     while (its--) {
904       for ( i=0; i<m; i++ ) {
905         n    = A->i[i+1] - A->i[i];
906         idx  = A->j + A->i[i] + shift;
907         v    = A->a + A->i[i] + shift;
908         sum  = b[i];
909         SPARSEDENSEMDOT(sum,xs,v,idx,n);
910         d    = fshift + A->a[diag[i]+shift];
911         n    = B->i[i+1] - B->i[i];
912         idx  = B->j + B->i[i] + shift;
913         v    = B->a + B->i[i] + shift;
914         SPARSEDENSEMDOT(sum,ls,v,idx,n);
915         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
916       }
917     }
918   }
919   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
920     if (flag & SOR_ZERO_INITIAL_GUESS) {
921       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
922     }
923     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
924                             mat->Mvctx); CHKERRQ(ierr);
925     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
926                             mat->Mvctx); CHKERRQ(ierr);
927     while (its--) {
928       for ( i=m-1; i>-1; i-- ) {
929         n    = A->i[i+1] - A->i[i];
930         idx  = A->j + A->i[i] + shift;
931         v    = A->a + A->i[i] + shift;
932         sum  = b[i];
933         SPARSEDENSEMDOT(sum,xs,v,idx,n);
934         d    = fshift + A->a[diag[i]+shift];
935         n    = B->i[i+1] - B->i[i];
936         idx  = B->j + B->i[i] + shift;
937         v    = B->a + B->i[i] + shift;
938         SPARSEDENSEMDOT(sum,ls,v,idx,n);
939         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
940       }
941     }
942   }
943   else {
944     SETERRQ(1,0,"Option not supported");
945   }
946   return 0;
947 }
948 
949 #undef __FUNC__
950 #define __FUNC__ "MatGetInfo_MPIAIJ"
951 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
952 {
953   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
954   Mat        A = mat->A, B = mat->B;
955   int        ierr;
956   double     isend[5], irecv[5];
957 
958   info->rows_global    = (double)mat->M;
959   info->columns_global = (double)mat->N;
960   info->rows_local     = (double)mat->m;
961   info->columns_local  = (double)mat->N;
962   info->block_size     = 1.0;
963   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
964   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
965   isend[3] = info->memory;  isend[4] = info->mallocs;
966   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
967   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
968   isend[3] += info->memory;  isend[4] += info->mallocs;
969   if (flag == MAT_LOCAL) {
970     info->nz_used      = isend[0];
971     info->nz_allocated = isend[1];
972     info->nz_unneeded  = isend[2];
973     info->memory       = isend[3];
974     info->mallocs      = isend[4];
975   } else if (flag == MAT_GLOBAL_MAX) {
976     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
977     info->nz_used      = irecv[0];
978     info->nz_allocated = irecv[1];
979     info->nz_unneeded  = irecv[2];
980     info->memory       = irecv[3];
981     info->mallocs      = irecv[4];
982   } else if (flag == MAT_GLOBAL_SUM) {
983     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
984     info->nz_used      = irecv[0];
985     info->nz_allocated = irecv[1];
986     info->nz_unneeded  = irecv[2];
987     info->memory       = irecv[3];
988     info->mallocs      = irecv[4];
989   }
990   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
991   info->fill_ratio_needed = 0;
992   info->factor_mallocs    = 0;
993 
994   return 0;
995 }
996 
997 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
998 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
999 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1000 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1001 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1002 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1003 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1004 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ "MatSetOption_MPIAIJ"
1008 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1009 {
1010   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1011 
1012   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1013       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1014       op == MAT_COLUMNS_UNSORTED ||
1015       op == MAT_COLUMNS_SORTED) {
1016         MatSetOption(a->A,op);
1017         MatSetOption(a->B,op);
1018   } else if (op == MAT_ROW_ORIENTED) {
1019     a->roworiented = 1;
1020     MatSetOption(a->A,op);
1021     MatSetOption(a->B,op);
1022   } else if (op == MAT_ROWS_SORTED ||
1023              op == MAT_ROWS_UNSORTED ||
1024              op == MAT_SYMMETRIC ||
1025              op == MAT_STRUCTURALLY_SYMMETRIC ||
1026              op == MAT_YES_NEW_DIAGONALS)
1027     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1028   else if (op == MAT_COLUMN_ORIENTED) {
1029     a->roworiented = 0;
1030     MatSetOption(a->A,op);
1031     MatSetOption(a->B,op);
1032   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
1033     a->donotstash = 1;
1034   } else if (op == MAT_NO_NEW_DIAGONALS)
1035     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1036   else
1037     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1038   return 0;
1039 }
1040 
1041 #undef __FUNC__
1042 #define __FUNC__ "MatGetSize_MPIAIJ"
1043 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1044 {
1045   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1046   *m = mat->M; *n = mat->N;
1047   return 0;
1048 }
1049 
1050 #undef __FUNC__
1051 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1052 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1053 {
1054   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1055   *m = mat->m; *n = mat->N;
1056   return 0;
1057 }
1058 
1059 #undef __FUNC__
1060 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1061 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1062 {
1063   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1064   *m = mat->rstart; *n = mat->rend;
1065   return 0;
1066 }
1067 
1068 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1069 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1070 
1071 #undef __FUNC__
1072 #define __FUNC__ "MatGetRow_MPIAIJ"
1073 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1074 {
1075   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1076   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1077   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1078   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1079   int        *cmap, *idx_p;
1080 
1081   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1082   mat->getrowactive = PETSC_TRUE;
1083 
1084   if (!mat->rowvalues && (idx || v)) {
1085     /*
1086         allocate enough space to hold information from the longest row.
1087     */
1088     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1089     int     max = 1,m = mat->m,tmp;
1090     for ( i=0; i<m; i++ ) {
1091       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1092       if (max < tmp) { max = tmp; }
1093     }
1094     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1095     CHKPTRQ(mat->rowvalues);
1096     mat->rowindices = (int *) (mat->rowvalues + max);
1097   }
1098 
1099   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1100   lrow = row - rstart;
1101 
1102   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1103   if (!v)   {pvA = 0; pvB = 0;}
1104   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1105   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1106   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1107   nztot = nzA + nzB;
1108 
1109   cmap  = mat->garray;
1110   if (v  || idx) {
1111     if (nztot) {
1112       /* Sort by increasing column numbers, assuming A and B already sorted */
1113       int imark = -1;
1114       if (v) {
1115         *v = v_p = mat->rowvalues;
1116         for ( i=0; i<nzB; i++ ) {
1117           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1118           else break;
1119         }
1120         imark = i;
1121         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1122         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1123       }
1124       if (idx) {
1125         *idx = idx_p = mat->rowindices;
1126         if (imark > -1) {
1127           for ( i=0; i<imark; i++ ) {
1128             idx_p[i] = cmap[cworkB[i]];
1129           }
1130         } else {
1131           for ( i=0; i<nzB; i++ ) {
1132             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1133             else break;
1134           }
1135           imark = i;
1136         }
1137         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1138         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1139       }
1140     }
1141     else {
1142       if (idx) *idx = 0;
1143       if (v)   *v   = 0;
1144     }
1145   }
1146   *nz = nztot;
1147   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1148   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1149   return 0;
1150 }
1151 
1152 #undef __FUNC__
1153 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1154 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1155 {
1156   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1157   if (aij->getrowactive == PETSC_FALSE) {
1158     SETERRQ(1,0,"MatGetRow not called");
1159   }
1160   aij->getrowactive = PETSC_FALSE;
1161   return 0;
1162 }
1163 
1164 #undef __FUNC__
1165 #define __FUNC__ "MatNorm_MPIAIJ"
1166 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1167 {
1168   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1169   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1170   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1171   double     sum = 0.0;
1172   Scalar     *v;
1173 
1174   if (aij->size == 1) {
1175     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1176   } else {
1177     if (type == NORM_FROBENIUS) {
1178       v = amat->a;
1179       for (i=0; i<amat->nz; i++ ) {
1180 #if defined(PETSC_COMPLEX)
1181         sum += real(conj(*v)*(*v)); v++;
1182 #else
1183         sum += (*v)*(*v); v++;
1184 #endif
1185       }
1186       v = bmat->a;
1187       for (i=0; i<bmat->nz; i++ ) {
1188 #if defined(PETSC_COMPLEX)
1189         sum += real(conj(*v)*(*v)); v++;
1190 #else
1191         sum += (*v)*(*v); v++;
1192 #endif
1193       }
1194       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1195       *norm = sqrt(*norm);
1196     }
1197     else if (type == NORM_1) { /* max column norm */
1198       double *tmp, *tmp2;
1199       int    *jj, *garray = aij->garray;
1200       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1201       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1202       PetscMemzero(tmp,aij->N*sizeof(double));
1203       *norm = 0.0;
1204       v = amat->a; jj = amat->j;
1205       for ( j=0; j<amat->nz; j++ ) {
1206         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1207       }
1208       v = bmat->a; jj = bmat->j;
1209       for ( j=0; j<bmat->nz; j++ ) {
1210         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1211       }
1212       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1213       for ( j=0; j<aij->N; j++ ) {
1214         if (tmp2[j] > *norm) *norm = tmp2[j];
1215       }
1216       PetscFree(tmp); PetscFree(tmp2);
1217     }
1218     else if (type == NORM_INFINITY) { /* max row norm */
1219       double ntemp = 0.0;
1220       for ( j=0; j<amat->m; j++ ) {
1221         v = amat->a + amat->i[j] + shift;
1222         sum = 0.0;
1223         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1224           sum += PetscAbsScalar(*v); v++;
1225         }
1226         v = bmat->a + bmat->i[j] + shift;
1227         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1228           sum += PetscAbsScalar(*v); v++;
1229         }
1230         if (sum > ntemp) ntemp = sum;
1231       }
1232       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1233     }
1234     else {
1235       SETERRQ(1,0,"No support for two norm");
1236     }
1237   }
1238   return 0;
1239 }
1240 
1241 #undef __FUNC__
1242 #define __FUNC__ "MatTranspose_MPIAIJ"
1243 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1244 {
1245   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1246   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1247   int        ierr,shift = Aloc->indexshift;
1248   Mat        B;
1249   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1250   Scalar     *array;
1251 
1252   if (matout == PETSC_NULL && M != N)
1253     SETERRQ(1,0,"Square matrix only for in-place");
1254   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1255          PETSC_NULL,&B); CHKERRQ(ierr);
1256 
1257   /* copy over the A part */
1258   Aloc = (Mat_SeqAIJ*) a->A->data;
1259   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1260   row = a->rstart;
1261   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1262   for ( i=0; i<m; i++ ) {
1263     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1264     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1265   }
1266   aj = Aloc->j;
1267   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1268 
1269   /* copy over the B part */
1270   Aloc = (Mat_SeqAIJ*) a->B->data;
1271   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1272   row = a->rstart;
1273   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1274   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1275   for ( i=0; i<m; i++ ) {
1276     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1277     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1278   }
1279   PetscFree(ct);
1280   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1281   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1282   if (matout != PETSC_NULL) {
1283     *matout = B;
1284   } else {
1285     /* This isn't really an in-place transpose .... but free data structures from a */
1286     PetscFree(a->rowners);
1287     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1288     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1289     if (a->colmap) PetscFree(a->colmap);
1290     if (a->garray) PetscFree(a->garray);
1291     if (a->lvec) VecDestroy(a->lvec);
1292     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1293     PetscFree(a);
1294     PetscMemcpy(A,B,sizeof(struct _Mat));
1295     PetscHeaderDestroy(B);
1296   }
1297   return 0;
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1302 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1303 {
1304   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1305   Mat a = aij->A, b = aij->B;
1306   int ierr,s1,s2,s3;
1307 
1308   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1309   if (rr) {
1310     s3 = aij->n;
1311     VecGetLocalSize_Fast(rr,s1);
1312     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1313     /* Overlap communication with computation. */
1314     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1315   }
1316   if (ll) {
1317     VecGetLocalSize_Fast(ll,s1);
1318     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1319     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1320   }
1321   /* scale  the diagonal block */
1322   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1323 
1324   if (rr) {
1325     /* Do a scatter end and then right scale the off-diagonal block */
1326     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1327     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1328   }
1329 
1330   return 0;
1331 }
1332 
1333 
1334 extern int MatPrintHelp_SeqAIJ(Mat);
1335 #undef __FUNC__
1336 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1337 static int MatPrintHelp_MPIAIJ(Mat A)
1338 {
1339   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1340 
1341   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1342   else return 0;
1343 }
1344 
1345 #undef __FUNC__
1346 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1347 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1348 {
1349   *bs = 1;
1350   return 0;
1351 }
1352 #undef __FUNC__
1353 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1354 static int MatSetUnfactored_MPIAIJ(Mat A)
1355 {
1356   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1357   int        ierr;
1358   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1359   return 0;
1360 }
1361 
1362 
1363 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1364 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1365 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1366 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1367 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1368 /* -------------------------------------------------------------------*/
1369 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1370        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1371        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1372        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1373        0,0,
1374        0,0,
1375        0,0,
1376        MatRelax_MPIAIJ,
1377        MatTranspose_MPIAIJ,
1378        MatGetInfo_MPIAIJ,0,
1379        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1380        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1381        0,
1382        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1383        0,0,0,0,
1384        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1385        0,0,
1386        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1387        0,0,0,
1388        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1389        MatPrintHelp_MPIAIJ,
1390        MatScale_MPIAIJ,0,0,0,
1391        MatGetBlockSize_MPIAIJ,0,0,0,0,
1392        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1393 
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatCreateMPIAIJ"
1397 /*@C
1398    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1399    (the default parallel PETSc format).  For good matrix assembly performance
1400    the user should preallocate the matrix storage by setting the parameters
1401    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1402    performance can be increased by more than a factor of 50.
1403 
1404    Input Parameters:
1405 .  comm - MPI communicator
1406 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1407            This value should be the same as the local size used in creating the
1408            y vector for the matrix-vector product y = Ax.
1409 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1410            This value should be the same as the local size used in creating the
1411            x vector for the matrix-vector product y = Ax.
1412 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1413 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1414 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1415            (same for all local rows)
1416 .  d_nzz - array containing the number of nonzeros in the various rows of the
1417            diagonal portion of the local submatrix (possibly different for each row)
1418            or PETSC_NULL. You must leave room for the diagonal entry even if
1419            it is zero.
1420 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1421            submatrix (same for all local rows).
1422 .  o_nzz - array containing the number of nonzeros in the various rows of the
1423            off-diagonal portion of the local submatrix (possibly different for
1424            each row) or PETSC_NULL.
1425 
1426    Output Parameter:
1427 .  A - the matrix
1428 
1429    Notes:
1430    The AIJ format (also called the Yale sparse matrix format or
1431    compressed row storage), is fully compatible with standard Fortran 77
1432    storage.  That is, the stored row and column indices can begin at
1433    either one (as in Fortran) or zero.  See the users manual for details.
1434 
1435    The user MUST specify either the local or global matrix dimensions
1436    (possibly both).
1437 
1438    By default, this format uses inodes (identical nodes) when possible.
1439    We search for consecutive rows with the same nonzero structure, thereby
1440    reusing matrix information to achieve increased efficiency.
1441 
1442    Options Database Keys:
1443 $    -mat_aij_no_inode  - Do not use inodes
1444 $    -mat_aij_inode_limit <limit> - Set inode limit.
1445 $        (max limit=5)
1446 $    -mat_aij_oneindex - Internally use indexing starting at 1
1447 $        rather than 0.  Note: When calling MatSetValues(),
1448 $        the user still MUST index entries starting at 0!
1449 
1450    Storage Information:
1451    For a square global matrix we define each processor's diagonal portion
1452    to be its local rows and the corresponding columns (a square submatrix);
1453    each processor's off-diagonal portion encompasses the remainder of the
1454    local matrix (a rectangular submatrix).
1455 
1456    The user can specify preallocated storage for the diagonal part of
1457    the local submatrix with either d_nz or d_nnz (not both).  Set
1458    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1459    memory allocation.  Likewise, specify preallocated storage for the
1460    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1461 
1462    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1463    the figure below we depict these three local rows and all columns (0-11).
1464 
1465 $          0 1 2 3 4 5 6 7 8 9 10 11
1466 $         -------------------
1467 $  row 3  |  o o o d d d o o o o o o
1468 $  row 4  |  o o o d d d o o o o o o
1469 $  row 5  |  o o o d d d o o o o o o
1470 $         -------------------
1471 $
1472 
1473    Thus, any entries in the d locations are stored in the d (diagonal)
1474    submatrix, and any entries in the o locations are stored in the
1475    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1476    stored simply in the MATSEQAIJ format for compressed row storage.
1477 
1478    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1479    and o_nz should indicate the number of nonzeros per row in the o matrix.
1480    In general, for PDE problems in which most nonzeros are near the diagonal,
1481    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1482    or you will get TERRIBLE performance; see the users' manual chapter on
1483    matrices.
1484 
1485 .keywords: matrix, aij, compressed row, sparse, parallel
1486 
1487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1488 @*/
1489 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1490                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1491 {
1492   Mat          B;
1493   Mat_MPIAIJ   *b;
1494   int          ierr, i,sum[2],work[2],size;
1495 
1496   *A = 0;
1497   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1498   PLogObjectCreate(B);
1499   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1500   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1501   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1502   MPI_Comm_size(comm,&size);
1503   if (size == 1) {
1504     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1505     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1506     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1507     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1508     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1509     B->ops.solve             = MatSolve_MPIAIJ;
1510     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1511     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1512     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1513     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1514   }
1515   B->destroy    = MatDestroy_MPIAIJ;
1516   B->view       = MatView_MPIAIJ;
1517   B->factor     = 0;
1518   B->assembled  = PETSC_FALSE;
1519   B->mapping    = 0;
1520 
1521   b->insertmode = NOT_SET_VALUES;
1522   MPI_Comm_rank(comm,&b->rank);
1523   MPI_Comm_size(comm,&b->size);
1524 
1525   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1526     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1527 
1528   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1529     work[0] = m; work[1] = n;
1530     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1531     if (M == PETSC_DECIDE) M = sum[0];
1532     if (N == PETSC_DECIDE) N = sum[1];
1533   }
1534   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1535   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1536   b->m = m; B->m = m;
1537   b->n = n; B->n = n;
1538   b->N = N; B->N = N;
1539   b->M = M; B->M = M;
1540 
1541   /* build local table of row and column ownerships */
1542   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1543   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1544   b->cowners = b->rowners + b->size + 2;
1545   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1546   b->rowners[0] = 0;
1547   for ( i=2; i<=b->size; i++ ) {
1548     b->rowners[i] += b->rowners[i-1];
1549   }
1550   b->rstart = b->rowners[b->rank];
1551   b->rend   = b->rowners[b->rank+1];
1552   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1553   b->cowners[0] = 0;
1554   for ( i=2; i<=b->size; i++ ) {
1555     b->cowners[i] += b->cowners[i-1];
1556   }
1557   b->cstart = b->cowners[b->rank];
1558   b->cend   = b->cowners[b->rank+1];
1559 
1560   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1561   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1562   PLogObjectParent(B,b->A);
1563   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1564   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1565   PLogObjectParent(B,b->B);
1566 
1567   /* build cache for off array entries formed */
1568   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1569   b->donotstash  = 0;
1570   b->colmap      = 0;
1571   b->garray      = 0;
1572   b->roworiented = 1;
1573 
1574   /* stuff used for matrix vector multiply */
1575   b->lvec      = 0;
1576   b->Mvctx     = 0;
1577 
1578   /* stuff for MatGetRow() */
1579   b->rowindices   = 0;
1580   b->rowvalues    = 0;
1581   b->getrowactive = PETSC_FALSE;
1582 
1583   *A = B;
1584   return 0;
1585 }
1586 
1587 #undef __FUNC__
1588 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1589 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1590 {
1591   Mat        mat;
1592   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1593   int        ierr, len=0, flg;
1594 
1595   *newmat       = 0;
1596   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1597   PLogObjectCreate(mat);
1598   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1599   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1600   mat->destroy    = MatDestroy_MPIAIJ;
1601   mat->view       = MatView_MPIAIJ;
1602   mat->factor     = matin->factor;
1603   mat->assembled  = PETSC_TRUE;
1604 
1605   a->m = mat->m   = oldmat->m;
1606   a->n = mat->n   = oldmat->n;
1607   a->M = mat->M   = oldmat->M;
1608   a->N = mat->N   = oldmat->N;
1609 
1610   a->rstart       = oldmat->rstart;
1611   a->rend         = oldmat->rend;
1612   a->cstart       = oldmat->cstart;
1613   a->cend         = oldmat->cend;
1614   a->size         = oldmat->size;
1615   a->rank         = oldmat->rank;
1616   a->insertmode   = NOT_SET_VALUES;
1617   a->rowvalues    = 0;
1618   a->getrowactive = PETSC_FALSE;
1619 
1620   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1621   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1622   a->cowners = a->rowners + a->size + 2;
1623   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1624   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1625   if (oldmat->colmap) {
1626     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1627     PLogObjectMemory(mat,(a->N)*sizeof(int));
1628     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1629   } else a->colmap = 0;
1630   if (oldmat->garray) {
1631     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1632     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1633     PLogObjectMemory(mat,len*sizeof(int));
1634     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1635   } else a->garray = 0;
1636 
1637   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1638   PLogObjectParent(mat,a->lvec);
1639   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1640   PLogObjectParent(mat,a->Mvctx);
1641   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1642   PLogObjectParent(mat,a->A);
1643   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1644   PLogObjectParent(mat,a->B);
1645   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1646   if (flg) {
1647     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1648   }
1649   *newmat = mat;
1650   return 0;
1651 }
1652 
1653 #include "sys.h"
1654 
1655 #undef __FUNC__
1656 #define __FUNC__ "MatLoad_MPIAIJ"
1657 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1658 {
1659   Mat          A;
1660   int          i, nz, ierr, j,rstart, rend, fd;
1661   Scalar       *vals,*svals;
1662   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1663   MPI_Status   status;
1664   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1665   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1666   int          tag = ((PetscObject)viewer)->tag;
1667 
1668   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1669   if (!rank) {
1670     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1671     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1672     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1673   }
1674 
1675   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1676   M = header[1]; N = header[2];
1677   /* determine ownership of all rows */
1678   m = M/size + ((M % size) > rank);
1679   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1680   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1681   rowners[0] = 0;
1682   for ( i=2; i<=size; i++ ) {
1683     rowners[i] += rowners[i-1];
1684   }
1685   rstart = rowners[rank];
1686   rend   = rowners[rank+1];
1687 
1688   /* distribute row lengths to all processors */
1689   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1690   offlens = ourlens + (rend-rstart);
1691   if (!rank) {
1692     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1693     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1694     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1695     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1696     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1697     PetscFree(sndcounts);
1698   }
1699   else {
1700     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1701   }
1702 
1703   if (!rank) {
1704     /* calculate the number of nonzeros on each processor */
1705     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1706     PetscMemzero(procsnz,size*sizeof(int));
1707     for ( i=0; i<size; i++ ) {
1708       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1709         procsnz[i] += rowlengths[j];
1710       }
1711     }
1712     PetscFree(rowlengths);
1713 
1714     /* determine max buffer needed and allocate it */
1715     maxnz = 0;
1716     for ( i=0; i<size; i++ ) {
1717       maxnz = PetscMax(maxnz,procsnz[i]);
1718     }
1719     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1720 
1721     /* read in my part of the matrix column indices  */
1722     nz = procsnz[0];
1723     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1724     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1725 
1726     /* read in every one elses and ship off */
1727     for ( i=1; i<size; i++ ) {
1728       nz = procsnz[i];
1729       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1730       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1731     }
1732     PetscFree(cols);
1733   }
1734   else {
1735     /* determine buffer space needed for message */
1736     nz = 0;
1737     for ( i=0; i<m; i++ ) {
1738       nz += ourlens[i];
1739     }
1740     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1741 
1742     /* receive message of column indices*/
1743     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1744     MPI_Get_count(&status,MPI_INT,&maxnz);
1745     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1746   }
1747 
1748   /* loop over local rows, determining number of off diagonal entries */
1749   PetscMemzero(offlens,m*sizeof(int));
1750   jj = 0;
1751   for ( i=0; i<m; i++ ) {
1752     for ( j=0; j<ourlens[i]; j++ ) {
1753       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1754       jj++;
1755     }
1756   }
1757 
1758   /* create our matrix */
1759   for ( i=0; i<m; i++ ) {
1760     ourlens[i] -= offlens[i];
1761   }
1762   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1763   A = *newmat;
1764   MatSetOption(A,MAT_COLUMNS_SORTED);
1765   for ( i=0; i<m; i++ ) {
1766     ourlens[i] += offlens[i];
1767   }
1768 
1769   if (!rank) {
1770     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1771 
1772     /* read in my part of the matrix numerical values  */
1773     nz = procsnz[0];
1774     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1775 
1776     /* insert into matrix */
1777     jj      = rstart;
1778     smycols = mycols;
1779     svals   = vals;
1780     for ( i=0; i<m; i++ ) {
1781       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1782       smycols += ourlens[i];
1783       svals   += ourlens[i];
1784       jj++;
1785     }
1786 
1787     /* read in other processors and ship out */
1788     for ( i=1; i<size; i++ ) {
1789       nz = procsnz[i];
1790       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1791       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1792     }
1793     PetscFree(procsnz);
1794   }
1795   else {
1796     /* receive numeric values */
1797     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1798 
1799     /* receive message of values*/
1800     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1801     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1802     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1803 
1804     /* insert into matrix */
1805     jj      = rstart;
1806     smycols = mycols;
1807     svals   = vals;
1808     for ( i=0; i<m; i++ ) {
1809       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1810       smycols += ourlens[i];
1811       svals   += ourlens[i];
1812       jj++;
1813     }
1814   }
1815   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1816 
1817   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1818   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1819   return 0;
1820 }
1821