xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 52c87ff2747de67dfdd6ccb0bd18d7d7f8d26768)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.212 1997/08/07 14:39:24 bsmith Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/vec/vecimpl.h"
8 #include "src/inline/spops.h"
9 
10 /* local utility routine that creates a mapping from the global column
11 number to the local number in the off-diagonal part of the local
12 storage of the matrix.  This is done in a non scable way since the
13 length of colmap equals the global matrix length.
14 */
15 #undef __FUNC__
16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" /* ADIC Ignore */
17 int CreateColmap_MPIAIJ_Private(Mat mat)
18 {
19   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
20   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
21   int        n = B->n,i;
22 
23   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
24   PLogObjectMemory(mat,aij->N*sizeof(int));
25   PetscMemzero(aij->colmap,aij->N*sizeof(int));
26   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
27   return 0;
28 }
29 
30 extern int DisAssemble_MPIAIJ(Mat);
31 
32 #define CHUNKSIZE   15
33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
34 { \
35  \
36     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
37     rmax = aimax[row]; nrow = ailen[row];  \
38     col1 = col - shift; \
39      \
40     low = 0; high = nrow; \
41     while (high-low > 5) { \
42       t = (low+high)/2; \
43       if (rp[t] > col) high = t; \
44       else             low  = t; \
45     } \
46       for ( _i=0; _i<nrow; _i++ ) { \
47         if (rp[_i] > col1) break; \
48         if (rp[_i] == col1) { \
49           if (addv == ADD_VALUES) ap[_i] += value;   \
50           else                  ap[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (nonew == 1) goto a_noinsert; \
55       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
56       if (nrow >= rmax) { \
57         /* there is no extra room in row, therefore enlarge */ \
58         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
59         Scalar *new_a; \
60  \
61         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
62  \
63         /* malloc new storage space */ \
64         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
65         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
66         new_j   = (int *) (new_a + new_nz); \
67         new_i   = new_j + new_nz; \
68  \
69         /* copy over old data into new slots */ \
70         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
71         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
72         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
73         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
74         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
75                                                            len*sizeof(int)); \
76         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
77         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
78                                                            len*sizeof(Scalar));  \
79         /* free up old matrix storage */ \
80  \
81         PetscFree(a->a);  \
82         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
83         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
84         a->singlemalloc = 1; \
85  \
86         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
87         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
88         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
89         a->maxnz += CHUNKSIZE; \
90         a->reallocs++; \
91       } \
92       N = nrow++ - 1; a->nz++; \
93       /* shift up all the later entries in this row */ \
94       for ( ii=N; ii>=_i; ii-- ) { \
95         rp[ii+1] = rp[ii]; \
96         ap[ii+1] = ap[ii]; \
97       } \
98       rp[_i] = col1;  \
99       ap[_i] = value;  \
100       a_noinsert: ; \
101       ailen[row] = nrow; \
102 }
103 
104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
105 { \
106  \
107     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
108     rmax = bimax[row]; nrow = bilen[row];  \
109     col1 = col - shift; \
110      \
111     low = 0; high = nrow; \
112     while (high-low > 5) { \
113       t = (low+high)/2; \
114       if (rp[t] > col) high = t; \
115       else             low  = t; \
116     } \
117        for ( _i=0; _i<nrow; _i++ ) { \
118         if (rp[_i] > col1) break; \
119         if (rp[_i] == col1) { \
120           if (addv == ADD_VALUES) ap[_i] += value;   \
121           else                  ap[_i] = value; \
122           goto b_noinsert; \
123         } \
124       }  \
125       if (nonew == 1) goto b_noinsert; \
126       else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
127       if (nrow >= rmax) { \
128         /* there is no extra room in row, therefore enlarge */ \
129         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
130         Scalar *new_a; \
131  \
132         if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
133  \
134         /* malloc new storage space */ \
135         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
136         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
137         new_j   = (int *) (new_a + new_nz); \
138         new_i   = new_j + new_nz; \
139  \
140         /* copy over old data into new slots */ \
141         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
142         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
143         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
144         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
145         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
146                                                            len*sizeof(int)); \
147         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
148         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
149                                                            len*sizeof(Scalar));  \
150         /* free up old matrix storage */ \
151  \
152         PetscFree(b->a);  \
153         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
154         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
155         b->singlemalloc = 1; \
156  \
157         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
158         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
159         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
160         b->maxnz += CHUNKSIZE; \
161         b->reallocs++; \
162       } \
163       N = nrow++ - 1; b->nz++; \
164       /* shift up all the later entries in this row */ \
165       for ( ii=N; ii>=_i; ii-- ) { \
166         rp[ii+1] = rp[ii]; \
167         ap[ii+1] = ap[ii]; \
168       } \
169       rp[_i] = col1;  \
170       ap[_i] = value;  \
171       b_noinsert: ; \
172       bilen[row] = nrow; \
173 }
174 
175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
176 #undef __FUNC__
177 #define __FUNC__ "MatSetValues_MPIAIJ"
178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
179 {
180   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
181   Scalar     value;
182   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
183   int        cstart = aij->cstart, cend = aij->cend,row,col;
184   int        roworiented = aij->roworiented;
185 
186   /* Some Variables required in the macro */
187   Mat        A = aij->A;
188   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
189   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
190   Scalar     *aa = a->a;
191 
192   Mat        B = aij->B;
193   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
194   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
195   Scalar     *ba = b->a;
196 
197   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
198   int        nonew = a->nonew,shift = a->indexshift;
199   Scalar     *ap;
200 
201   for ( i=0; i<m; i++ ) {
202 #if defined(PETSC_BOPT_g)
203     if (im[i] < 0) SETERRQ(1,0,"Negative row");
204     if (im[i] >= aij->M) SETERRQ(1,0,"Row too large");
205 #endif
206     if (im[i] >= rstart && im[i] < rend) {
207       row = im[i] - rstart;
208       for ( j=0; j<n; j++ ) {
209         if (in[j] >= cstart && in[j] < cend){
210           col = in[j] - cstart;
211           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
212           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
213           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215 #if defined(PETSC_BOPT_g)
216         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
217         else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");}
218 #endif
219         else {
220           if (mat->was_assembled) {
221             if (!aij->colmap) {
222               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
223             }
224             col = aij->colmap[in[j]] - 1;
225             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
226               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
227               col =  in[j];
228               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
229               B = aij->B;
230               b = (Mat_SeqAIJ *) B->data;
231               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
232               ba = b->a;
233             }
234           }
235           else col = in[j];
236           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
237           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
238           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
239         }
240       }
241     }
242     else {
243       if (roworiented && !aij->donotstash) {
244         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
245       }
246       else {
247         if (!aij->donotstash) {
248           row = im[i];
249           for ( j=0; j<n; j++ ) {
250             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
251           }
252         }
253       }
254     }
255   }
256   return 0;
257 }
258 
259 #undef __FUNC__
260 #define __FUNC__ "MatGetValues_MPIAIJ"
261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
262 {
263   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
264   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
265   int        cstart = aij->cstart, cend = aij->cend,row,col;
266 
267   for ( i=0; i<m; i++ ) {
268     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
269     if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large");
270     if (idxm[i] >= rstart && idxm[i] < rend) {
271       row = idxm[i] - rstart;
272       for ( j=0; j<n; j++ ) {
273         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
274         if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large");
275         if (idxn[j] >= cstart && idxn[j] < cend){
276           col = idxn[j] - cstart;
277           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
278         }
279         else {
280           if (!aij->colmap) {
281             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
282           }
283           col = aij->colmap[idxn[j]] - 1;
284           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
285           else {
286             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
287           }
288         }
289       }
290     }
291     else {
292       SETERRQ(1,0,"Only local values currently supported");
293     }
294   }
295   return 0;
296 }
297 
298 #undef __FUNC__
299 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
300 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
303   MPI_Comm    comm = mat->comm;
304   int         size = aij->size, *owners = aij->rowners;
305   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
306   MPI_Request *send_waits,*recv_waits;
307   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
308   InsertMode  addv;
309   Scalar      *rvalues,*svalues;
310 
311   /* make sure all processors are either in INSERTMODE or ADDMODE */
312   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
313   if (addv == (ADD_VALUES|INSERT_VALUES)) {
314     SETERRQ(1,0,"Some processors inserted others added");
315   }
316   mat->insertmode = addv; /* in case this processor had no cache */
317 
318   /*  first count number of contributors to each processor */
319   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
320   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
321   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
322   for ( i=0; i<aij->stash.n; i++ ) {
323     idx = aij->stash.idx[i];
324     for ( j=0; j<size; j++ ) {
325       if (idx >= owners[j] && idx < owners[j+1]) {
326         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
327       }
328     }
329   }
330   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
331 
332   /* inform other processors of number of messages and max length*/
333   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
334   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
335   nreceives = work[rank];
336   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
337   nmax = work[rank];
338   PetscFree(work);
339 
340   /* post receives:
341        1) each message will consist of ordered pairs
342      (global index,value) we store the global index as a double
343      to simplify the message passing.
344        2) since we don't know how long each individual message is we
345      allocate the largest needed buffer for each receive. Potentially
346      this is a lot of wasted space.
347 
348 
349        This could be done better.
350   */
351   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
352   CHKPTRQ(rvalues);
353   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
354   CHKPTRQ(recv_waits);
355   for ( i=0; i<nreceives; i++ ) {
356     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
357               comm,recv_waits+i);
358   }
359 
360   /* do sends:
361       1) starts[i] gives the starting index in svalues for stuff going to
362          the ith processor
363   */
364   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
365   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
366   CHKPTRQ(send_waits);
367   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
368   starts[0] = 0;
369   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
370   for ( i=0; i<aij->stash.n; i++ ) {
371     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
372     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
373     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
374   }
375   PetscFree(owner);
376   starts[0] = 0;
377   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
378   count = 0;
379   for ( i=0; i<size; i++ ) {
380     if (procs[i]) {
381       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
382                 comm,send_waits+count++);
383     }
384   }
385   PetscFree(starts); PetscFree(nprocs);
386 
387   /* Free cache space */
388   PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
389   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
390 
391   aij->svalues    = svalues;    aij->rvalues    = rvalues;
392   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
393   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
394   aij->rmax       = nmax;
395 
396   return 0;
397 }
398 extern int MatSetUpMultiply_MPIAIJ(Mat);
399 
400 #undef __FUNC__
401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
403 {
404   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
405   MPI_Status  *send_status,recv_status;
406   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
407   int         row,col,other_disassembled;
408   Scalar      *values,val;
409   InsertMode  addv = mat->insertmode;
410 
411   /*  wait on receives */
412   while (count) {
413     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
414     /* unpack receives into our local space */
415     values = aij->rvalues + 3*imdex*aij->rmax;
416     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
417     n = n/3;
418     for ( i=0; i<n; i++ ) {
419       row = (int) PetscReal(values[3*i]) - aij->rstart;
420       col = (int) PetscReal(values[3*i+1]);
421       val = values[3*i+2];
422       if (col >= aij->cstart && col < aij->cend) {
423         col -= aij->cstart;
424         ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
425       }
426       else {
427         if (mat->was_assembled || mat->assembled) {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr);
430           }
431           col = aij->colmap[col] - 1;
432           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
433             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
434             col = (int) PetscReal(values[3*i+1]);
435           }
436         }
437         ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr);
438       }
439     }
440     count--;
441   }
442   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
443 
444   /* wait on sends */
445   if (aij->nsends) {
446     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
447     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
448     PetscFree(send_status);
449   }
450   PetscFree(aij->send_waits); PetscFree(aij->svalues);
451 
452   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
454 
455   /* determine if any processor has disassembled, if so we must
456      also disassemble ourselfs, in order that we may reassemble. */
457   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
458   if (mat->was_assembled && !other_disassembled) {
459     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
460   }
461 
462   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
463     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
464   }
465   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
466   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
467 
468   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
469   return 0;
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatZeroEntries_MPIAIJ"
474 int MatZeroEntries_MPIAIJ(Mat A)
475 {
476   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
477   int        ierr;
478   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
479   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
480   return 0;
481 }
482 
483 /* the code does not do the diagonal entries correctly unless the
484    matrix is square and the column and row owerships are identical.
485    This is a BUG. The only way to fix it seems to be to access
486    aij->A and aij->B directly and not through the MatZeroRows()
487    routine.
488 */
489 #undef __FUNC__
490 #define __FUNC__ "MatZeroRows_MPIAIJ"
491 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
492 {
493   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
494   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
495   int            *procs,*nprocs,j,found,idx,nsends,*work;
496   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
497   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
498   int            *lens,imdex,*lrows,*values;
499   MPI_Comm       comm = A->comm;
500   MPI_Request    *send_waits,*recv_waits;
501   MPI_Status     recv_status,*send_status;
502   IS             istmp;
503 
504   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
505   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
506 
507   /*  first count number of contributors to each processor */
508   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
509   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
510   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
511   for ( i=0; i<N; i++ ) {
512     idx = rows[i];
513     found = 0;
514     for ( j=0; j<size; j++ ) {
515       if (idx >= owners[j] && idx < owners[j+1]) {
516         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
517       }
518     }
519     if (!found) SETERRQ(1,0,"Index out of range");
520   }
521   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
522 
523   /* inform other processors of number of messages and max length*/
524   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
525   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
526   nrecvs = work[rank];
527   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
528   nmax = work[rank];
529   PetscFree(work);
530 
531   /* post receives:   */
532   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
533   CHKPTRQ(rvalues);
534   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
535   CHKPTRQ(recv_waits);
536   for ( i=0; i<nrecvs; i++ ) {
537     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
538   }
539 
540   /* do sends:
541       1) starts[i] gives the starting index in svalues for stuff going to
542          the ith processor
543   */
544   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
545   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
546   CHKPTRQ(send_waits);
547   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
548   starts[0] = 0;
549   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
550   for ( i=0; i<N; i++ ) {
551     svalues[starts[owner[i]]++] = rows[i];
552   }
553   ISRestoreIndices(is,&rows);
554 
555   starts[0] = 0;
556   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
557   count = 0;
558   for ( i=0; i<size; i++ ) {
559     if (procs[i]) {
560       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
561     }
562   }
563   PetscFree(starts);
564 
565   base = owners[rank];
566 
567   /*  wait on receives */
568   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
569   source = lens + nrecvs;
570   count  = nrecvs; slen = 0;
571   while (count) {
572     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
573     /* unpack receives into our local space */
574     MPI_Get_count(&recv_status,MPI_INT,&n);
575     source[imdex]  = recv_status.MPI_SOURCE;
576     lens[imdex]  = n;
577     slen += n;
578     count--;
579   }
580   PetscFree(recv_waits);
581 
582   /* move the data into the send scatter */
583   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
584   count = 0;
585   for ( i=0; i<nrecvs; i++ ) {
586     values = rvalues + i*nmax;
587     for ( j=0; j<lens[i]; j++ ) {
588       lrows[count++] = values[j] - base;
589     }
590   }
591   PetscFree(rvalues); PetscFree(lens);
592   PetscFree(owner); PetscFree(nprocs);
593 
594   /* actually zap the local rows */
595   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
596   PLogObjectParent(A,istmp);
597   PetscFree(lrows);
598   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
599   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
600   ierr = ISDestroy(istmp); CHKERRQ(ierr);
601 
602   /* wait on sends */
603   if (nsends) {
604     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
605     CHKPTRQ(send_status);
606     MPI_Waitall(nsends,send_waits,send_status);
607     PetscFree(send_status);
608   }
609   PetscFree(send_waits); PetscFree(svalues);
610 
611   return 0;
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatMult_MPIAIJ"
616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
617 {
618   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
619   int        ierr,nt;
620 
621   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
622   if (nt != a->n) {
623     SETERRQ(1,0,"Incompatible partition of A and xx");
624   }
625   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
626   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
627   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
628   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
629   return 0;
630 }
631 
632 #undef __FUNC__
633 #define __FUNC__ "MatMultAdd_MPIAIJ"
634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
637   int        ierr;
638   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
639   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
640   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
641   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
642   return 0;
643 }
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatMultTrans_MPIAIJ"
647 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
648 {
649   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
650   int        ierr;
651 
652   /* do nondiagonal part */
653   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
654   /* send it on its way */
655   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
656   /* do local part */
657   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
658   /* receive remote parts: note this assumes the values are not actually */
659   /* inserted in yy until the next line, which is true for my implementation*/
660   /* but is not perhaps always true. */
661   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
662   return 0;
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
667 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   /* do nondiagonal part */
673   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
674   /* send it on its way */
675   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
676   /* do local part */
677   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
678   /* receive remote parts: note this assumes the values are not actually */
679   /* inserted in yy until the next line, which is true for my implementation*/
680   /* but is not perhaps always true. */
681   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
682   return 0;
683 }
684 
685 /*
686   This only works correctly for square matrices where the subblock A->A is the
687    diagonal block
688 */
689 #undef __FUNC__
690 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
691 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
692 {
693   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
694   if (a->M != a->N)
695     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
696   if (a->rstart != a->cstart || a->rend != a->cend) {
697     SETERRQ(1,0,"row partition must equal col partition");  }
698   return MatGetDiagonal(a->A,v);
699 }
700 
701 #undef __FUNC__
702 #define __FUNC__ "MatScale_MPIAIJ"
703 int MatScale_MPIAIJ(Scalar *aa,Mat A)
704 {
705   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
706   int        ierr;
707   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
708   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
709   return 0;
710 }
711 
712 #undef __FUNC__
713 #define __FUNC__ "MatDestroy_MPIAIJ" /* ADIC Ignore */
714 int MatDestroy_MPIAIJ(PetscObject obj)
715 {
716   Mat        mat = (Mat) obj;
717   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
718   int        ierr;
719 
720 #if defined(PETSC_LOG)
721   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
722 #endif
723   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
724   PetscFree(aij->rowners);
725   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
726   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
727   if (aij->colmap) PetscFree(aij->colmap);
728   if (aij->garray) PetscFree(aij->garray);
729   if (aij->lvec)   VecDestroy(aij->lvec);
730   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
731   if (aij->rowvalues) PetscFree(aij->rowvalues);
732   PetscFree(aij);
733   if (mat->mapping) {
734     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
735   }
736   PLogObjectDestroy(mat);
737   PetscHeaderDestroy(mat);
738   return 0;
739 }
740 
741 #undef __FUNC__
742 #define __FUNC__ "MatView_MPIAIJ_Binary" /* ADIC Ignore */
743 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
744 {
745   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
746   int         ierr;
747 
748   if (aij->size == 1) {
749     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
750   }
751   else SETERRQ(1,0,"Only uniprocessor output supported");
752   return 0;
753 }
754 
755 #undef __FUNC__
756 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" /* ADIC Ignore */
757 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
758 {
759   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
760   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
761   int         ierr, format,shift = C->indexshift,rank;
762   FILE        *fd;
763   ViewerType  vtype;
764 
765   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
766   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
767     ierr = ViewerGetFormat(viewer,&format);
768     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
769       MatInfo info;
770       int     flg;
771       MPI_Comm_rank(mat->comm,&rank);
772       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
773       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
774       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
775       PetscSequentialPhaseBegin(mat->comm,1);
776       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
777          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
778       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
779          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
780       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
781       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
782       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
783       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
784       fflush(fd);
785       PetscSequentialPhaseEnd(mat->comm,1);
786       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
787       return 0;
788     }
789     else if (format == VIEWER_FORMAT_ASCII_INFO) {
790       return 0;
791     }
792   }
793 
794   if (vtype == DRAW_VIEWER) {
795     Draw       draw;
796     PetscTruth isnull;
797     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
798     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
799   }
800 
801   if (vtype == ASCII_FILE_VIEWER) {
802     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
803     PetscSequentialPhaseBegin(mat->comm,1);
804     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
805            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
806            aij->cend);
807     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
808     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
809     fflush(fd);
810     PetscSequentialPhaseEnd(mat->comm,1);
811   }
812   else {
813     int size = aij->size;
814     rank = aij->rank;
815     if (size == 1) {
816       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
817     }
818     else {
819       /* assemble the entire matrix onto first processor. */
820       Mat         A;
821       Mat_SeqAIJ *Aloc;
822       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
823       Scalar      *a;
824 
825       if (!rank) {
826         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
827                CHKERRQ(ierr);
828       }
829       else {
830         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
831                CHKERRQ(ierr);
832       }
833       PLogObjectParent(mat,A);
834 
835       /* copy over the A part */
836       Aloc = (Mat_SeqAIJ*) aij->A->data;
837       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
838       row = aij->rstart;
839       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
840       for ( i=0; i<m; i++ ) {
841         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
842         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
843       }
844       aj = Aloc->j;
845       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
846 
847       /* copy over the B part */
848       Aloc = (Mat_SeqAIJ*) aij->B->data;
849       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
850       row = aij->rstart;
851       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
852       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
853       for ( i=0; i<m; i++ ) {
854         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
855         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
856       }
857       PetscFree(ct);
858       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
859       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
860       if (!rank) {
861         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
862       }
863       ierr = MatDestroy(A); CHKERRQ(ierr);
864     }
865   }
866   return 0;
867 }
868 
869 #undef __FUNC__
870 #define __FUNC__ "MatView_MPIAIJ" /* ADIC Ignore */
871 int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
872 {
873   Mat         mat = (Mat) obj;
874   int         ierr;
875   ViewerType  vtype;
876 
877   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
878   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
879       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
880     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
881   }
882   else if (vtype == BINARY_FILE_VIEWER) {
883     return MatView_MPIAIJ_Binary(mat,viewer);
884   }
885   return 0;
886 }
887 
888 /*
889     This has to provide several versions.
890 
891      2) a) use only local smoothing updating outer values only once.
892         b) local smoothing updating outer values each inner iteration
893      3) color updating out values betwen colors.
894 */
895 #undef __FUNC__
896 #define __FUNC__ "MatRelax_MPIAIJ"
897 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
898                            double fshift,int its,Vec xx)
899 {
900   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
901   Mat        AA = mat->A, BB = mat->B;
902   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
903   Scalar     *b,*x,*xs,*ls,d,*v,sum;
904   int        ierr,*idx, *diag;
905   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
906 
907   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
908   xs = x + shift; /* shift by one for index start of 1 */
909   ls = ls + shift;
910   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
911   diag = A->diag;
912   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
913     if (flag & SOR_ZERO_INITIAL_GUESS) {
914       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
915     }
916     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
917     CHKERRQ(ierr);
918     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
919     CHKERRQ(ierr);
920     while (its--) {
921       /* go down through the rows */
922       for ( i=0; i<m; i++ ) {
923         n    = A->i[i+1] - A->i[i];
924         idx  = A->j + A->i[i] + shift;
925         v    = A->a + A->i[i] + shift;
926         sum  = b[i];
927         SPARSEDENSEMDOT(sum,xs,v,idx,n);
928         d    = fshift + A->a[diag[i]+shift];
929         n    = B->i[i+1] - B->i[i];
930         idx  = B->j + B->i[i] + shift;
931         v    = B->a + B->i[i] + shift;
932         SPARSEDENSEMDOT(sum,ls,v,idx,n);
933         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
934       }
935       /* come up through the rows */
936       for ( i=m-1; i>-1; i-- ) {
937         n    = A->i[i+1] - A->i[i];
938         idx  = A->j + A->i[i] + shift;
939         v    = A->a + A->i[i] + shift;
940         sum  = b[i];
941         SPARSEDENSEMDOT(sum,xs,v,idx,n);
942         d    = fshift + A->a[diag[i]+shift];
943         n    = B->i[i+1] - B->i[i];
944         idx  = B->j + B->i[i] + shift;
945         v    = B->a + B->i[i] + shift;
946         SPARSEDENSEMDOT(sum,ls,v,idx,n);
947         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
948       }
949     }
950   }
951   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
952     if (flag & SOR_ZERO_INITIAL_GUESS) {
953       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
954     }
955     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
956     CHKERRQ(ierr);
957     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
958     CHKERRQ(ierr);
959     while (its--) {
960       for ( i=0; i<m; i++ ) {
961         n    = A->i[i+1] - A->i[i];
962         idx  = A->j + A->i[i] + shift;
963         v    = A->a + A->i[i] + shift;
964         sum  = b[i];
965         SPARSEDENSEMDOT(sum,xs,v,idx,n);
966         d    = fshift + A->a[diag[i]+shift];
967         n    = B->i[i+1] - B->i[i];
968         idx  = B->j + B->i[i] + shift;
969         v    = B->a + B->i[i] + shift;
970         SPARSEDENSEMDOT(sum,ls,v,idx,n);
971         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
972       }
973     }
974   }
975   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
976     if (flag & SOR_ZERO_INITIAL_GUESS) {
977       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
978     }
979     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
980                             mat->Mvctx); CHKERRQ(ierr);
981     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,
982                             mat->Mvctx); CHKERRQ(ierr);
983     while (its--) {
984       for ( i=m-1; i>-1; i-- ) {
985         n    = A->i[i+1] - A->i[i];
986         idx  = A->j + A->i[i] + shift;
987         v    = A->a + A->i[i] + shift;
988         sum  = b[i];
989         SPARSEDENSEMDOT(sum,xs,v,idx,n);
990         d    = fshift + A->a[diag[i]+shift];
991         n    = B->i[i+1] - B->i[i];
992         idx  = B->j + B->i[i] + shift;
993         v    = B->a + B->i[i] + shift;
994         SPARSEDENSEMDOT(sum,ls,v,idx,n);
995         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
996       }
997     }
998   }
999   else {
1000     SETERRQ(1,0,"Option not supported");
1001   }
1002   return 0;
1003 }
1004 
1005 #undef __FUNC__
1006 #define __FUNC__ "MatGetInfo_MPIAIJ" /* ADIC Ignore */
1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1008 {
1009   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010   Mat        A = mat->A, B = mat->B;
1011   int        ierr;
1012   double     isend[5], irecv[5];
1013 
1014   info->rows_global    = (double)mat->M;
1015   info->columns_global = (double)mat->N;
1016   info->rows_local     = (double)mat->m;
1017   info->columns_local  = (double)mat->N;
1018   info->block_size     = 1.0;
1019   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1020   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1021   isend[3] = info->memory;  isend[4] = info->mallocs;
1022   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1023   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1024   isend[3] += info->memory;  isend[4] += info->mallocs;
1025   if (flag == MAT_LOCAL) {
1026     info->nz_used      = isend[0];
1027     info->nz_allocated = isend[1];
1028     info->nz_unneeded  = isend[2];
1029     info->memory       = isend[3];
1030     info->mallocs      = isend[4];
1031   } else if (flag == MAT_GLOBAL_MAX) {
1032     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1033     info->nz_used      = irecv[0];
1034     info->nz_allocated = irecv[1];
1035     info->nz_unneeded  = irecv[2];
1036     info->memory       = irecv[3];
1037     info->mallocs      = irecv[4];
1038   } else if (flag == MAT_GLOBAL_SUM) {
1039     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1040     info->nz_used      = irecv[0];
1041     info->nz_allocated = irecv[1];
1042     info->nz_unneeded  = irecv[2];
1043     info->memory       = irecv[3];
1044     info->mallocs      = irecv[4];
1045   }
1046   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1047   info->fill_ratio_needed = 0;
1048   info->factor_mallocs    = 0;
1049 
1050   return 0;
1051 }
1052 
1053 #undef __FUNC__
1054 #define __FUNC__ "MatSetOption_MPIAIJ" /* ADIC Ignore */
1055 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1056 {
1057   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1058 
1059   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1060       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1061       op == MAT_COLUMNS_UNSORTED ||
1062       op == MAT_COLUMNS_SORTED ||
1063       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1064       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1065         MatSetOption(a->A,op);
1066         MatSetOption(a->B,op);
1067   } else if (op == MAT_ROW_ORIENTED) {
1068     a->roworiented = 1;
1069     MatSetOption(a->A,op);
1070     MatSetOption(a->B,op);
1071   } else if (op == MAT_ROWS_SORTED ||
1072              op == MAT_ROWS_UNSORTED ||
1073              op == MAT_SYMMETRIC ||
1074              op == MAT_STRUCTURALLY_SYMMETRIC ||
1075              op == MAT_YES_NEW_DIAGONALS)
1076     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1077   else if (op == MAT_COLUMN_ORIENTED) {
1078     a->roworiented = 0;
1079     MatSetOption(a->A,op);
1080     MatSetOption(a->B,op);
1081   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1082     a->donotstash = 1;
1083   } else if (op == MAT_NO_NEW_DIAGONALS)
1084     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
1085   else
1086     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1087   return 0;
1088 }
1089 
1090 #undef __FUNC__
1091 #define __FUNC__ "MatGetSize_MPIAIJ" /* ADIC Ignore */
1092 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1093 {
1094   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1095   *m = mat->M; *n = mat->N;
1096   return 0;
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetLocalSize_MPIAIJ" /* ADIC Ignore */
1101 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104   *m = mat->m; *n = mat->N;
1105   return 0;
1106 }
1107 
1108 #undef __FUNC__
1109 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" /* ADIC Ignore */
1110 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1111 {
1112   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1113   *m = mat->rstart; *n = mat->rend;
1114   return 0;
1115 }
1116 
1117 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1118 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1119 
1120 #undef __FUNC__
1121 #define __FUNC__ "MatGetRow_MPIAIJ"
1122 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1123 {
1124   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1125   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1126   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1127   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1128   int        *cmap, *idx_p;
1129 
1130   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1131   mat->getrowactive = PETSC_TRUE;
1132 
1133   if (!mat->rowvalues && (idx || v)) {
1134     /*
1135         allocate enough space to hold information from the longest row.
1136     */
1137     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1138     int     max = 1,m = mat->m,tmp;
1139     for ( i=0; i<m; i++ ) {
1140       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1141       if (max < tmp) { max = tmp; }
1142     }
1143     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1144     CHKPTRQ(mat->rowvalues);
1145     mat->rowindices = (int *) (mat->rowvalues + max);
1146   }
1147 
1148   if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows")
1149   lrow = row - rstart;
1150 
1151   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1152   if (!v)   {pvA = 0; pvB = 0;}
1153   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1154   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1155   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1156   nztot = nzA + nzB;
1157 
1158   cmap  = mat->garray;
1159   if (v  || idx) {
1160     if (nztot) {
1161       /* Sort by increasing column numbers, assuming A and B already sorted */
1162       int imark = -1;
1163       if (v) {
1164         *v = v_p = mat->rowvalues;
1165         for ( i=0; i<nzB; i++ ) {
1166           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1167           else break;
1168         }
1169         imark = i;
1170         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1171         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1172       }
1173       if (idx) {
1174         *idx = idx_p = mat->rowindices;
1175         if (imark > -1) {
1176           for ( i=0; i<imark; i++ ) {
1177             idx_p[i] = cmap[cworkB[i]];
1178           }
1179         } else {
1180           for ( i=0; i<nzB; i++ ) {
1181             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1182             else break;
1183           }
1184           imark = i;
1185         }
1186         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1187         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1188       }
1189     }
1190     else {
1191       if (idx) *idx = 0;
1192       if (v)   *v   = 0;
1193     }
1194   }
1195   *nz = nztot;
1196   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1197   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1198   return 0;
1199 }
1200 
1201 #undef __FUNC__
1202 #define __FUNC__ "MatRestoreRow_MPIAIJ" /* ADIC Ignore */
1203 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1204 {
1205   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1206   if (aij->getrowactive == PETSC_FALSE) {
1207     SETERRQ(1,0,"MatGetRow not called");
1208   }
1209   aij->getrowactive = PETSC_FALSE;
1210   return 0;
1211 }
1212 
1213 #undef __FUNC__
1214 #define __FUNC__ "MatNorm_MPIAIJ"
1215 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1216 {
1217   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1218   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1219   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1220   double     sum = 0.0;
1221   Scalar     *v;
1222 
1223   if (aij->size == 1) {
1224     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1225   } else {
1226     if (type == NORM_FROBENIUS) {
1227       v = amat->a;
1228       for (i=0; i<amat->nz; i++ ) {
1229 #if defined(PETSC_COMPLEX)
1230         sum += real(conj(*v)*(*v)); v++;
1231 #else
1232         sum += (*v)*(*v); v++;
1233 #endif
1234       }
1235       v = bmat->a;
1236       for (i=0; i<bmat->nz; i++ ) {
1237 #if defined(PETSC_COMPLEX)
1238         sum += real(conj(*v)*(*v)); v++;
1239 #else
1240         sum += (*v)*(*v); v++;
1241 #endif
1242       }
1243       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1244       *norm = sqrt(*norm);
1245     }
1246     else if (type == NORM_1) { /* max column norm */
1247       double *tmp, *tmp2;
1248       int    *jj, *garray = aij->garray;
1249       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1250       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1251       PetscMemzero(tmp,aij->N*sizeof(double));
1252       *norm = 0.0;
1253       v = amat->a; jj = amat->j;
1254       for ( j=0; j<amat->nz; j++ ) {
1255         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1256       }
1257       v = bmat->a; jj = bmat->j;
1258       for ( j=0; j<bmat->nz; j++ ) {
1259         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1260       }
1261       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1262       for ( j=0; j<aij->N; j++ ) {
1263         if (tmp2[j] > *norm) *norm = tmp2[j];
1264       }
1265       PetscFree(tmp); PetscFree(tmp2);
1266     }
1267     else if (type == NORM_INFINITY) { /* max row norm */
1268       double ntemp = 0.0;
1269       for ( j=0; j<amat->m; j++ ) {
1270         v = amat->a + amat->i[j] + shift;
1271         sum = 0.0;
1272         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1273           sum += PetscAbsScalar(*v); v++;
1274         }
1275         v = bmat->a + bmat->i[j] + shift;
1276         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1277           sum += PetscAbsScalar(*v); v++;
1278         }
1279         if (sum > ntemp) ntemp = sum;
1280       }
1281       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1282     }
1283     else {
1284       SETERRQ(1,0,"No support for two norm");
1285     }
1286   }
1287   return 0;
1288 }
1289 
1290 #undef __FUNC__
1291 #define __FUNC__ "MatTranspose_MPIAIJ"
1292 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1293 {
1294   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1295   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1296   int        ierr,shift = Aloc->indexshift;
1297   Mat        B;
1298   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1299   Scalar     *array;
1300 
1301   if (matout == PETSC_NULL && M != N)
1302     SETERRQ(1,0,"Square matrix only for in-place");
1303   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1304          PETSC_NULL,&B); CHKERRQ(ierr);
1305 
1306   /* copy over the A part */
1307   Aloc = (Mat_SeqAIJ*) a->A->data;
1308   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1309   row = a->rstart;
1310   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1311   for ( i=0; i<m; i++ ) {
1312     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1313     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1314   }
1315   aj = Aloc->j;
1316   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1317 
1318   /* copy over the B part */
1319   Aloc = (Mat_SeqAIJ*) a->B->data;
1320   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1321   row = a->rstart;
1322   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1323   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1324   for ( i=0; i<m; i++ ) {
1325     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1326     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1327   }
1328   PetscFree(ct);
1329   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1330   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1331   if (matout != PETSC_NULL) {
1332     *matout = B;
1333   } else {
1334     /* This isn't really an in-place transpose .... but free data structures from a */
1335     PetscFree(a->rowners);
1336     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1337     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1338     if (a->colmap) PetscFree(a->colmap);
1339     if (a->garray) PetscFree(a->garray);
1340     if (a->lvec) VecDestroy(a->lvec);
1341     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1342     PetscFree(a);
1343     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1344     PetscHeaderDestroy(B);
1345   }
1346   return 0;
1347 }
1348 
1349 #undef __FUNC__
1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1352 {
1353   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1354   Mat a = aij->A, b = aij->B;
1355   int ierr,s1,s2,s3;
1356 
1357   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1358   if (rr) {
1359     s3 = aij->n;
1360     VecGetLocalSize_Fast(rr,s1);
1361     if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size");
1362     /* Overlap communication with computation. */
1363     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1364   }
1365   if (ll) {
1366     VecGetLocalSize_Fast(ll,s1);
1367     if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size");
1368     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1369   }
1370   /* scale  the diagonal block */
1371   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1372 
1373   if (rr) {
1374     /* Do a scatter end and then right scale the off-diagonal block */
1375     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1376     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1377   }
1378 
1379   return 0;
1380 }
1381 
1382 
1383 extern int MatPrintHelp_SeqAIJ(Mat);
1384 #undef __FUNC__
1385 #define __FUNC__ "MatPrintHelp_MPIAIJ" /* ADIC Ignore */
1386 int MatPrintHelp_MPIAIJ(Mat A)
1387 {
1388   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1389 
1390   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1391   else return 0;
1392 }
1393 
1394 #undef __FUNC__
1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" /* ADIC Ignore */
1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1397 {
1398   *bs = 1;
1399   return 0;
1400 }
1401 #undef __FUNC__
1402 #define __FUNC__ "MatSetUnfactored_MPIAIJ" /* ADIC Ignore */
1403 int MatSetUnfactored_MPIAIJ(Mat A)
1404 {
1405   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1406   int        ierr;
1407   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1408   return 0;
1409 }
1410 
1411 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1412 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1413 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1414 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1415 /* -------------------------------------------------------------------*/
1416 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1417        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1418        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1419        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1420        0,0,
1421        0,0,
1422        0,0,
1423        MatRelax_MPIAIJ,
1424        MatTranspose_MPIAIJ,
1425        MatGetInfo_MPIAIJ,0,
1426        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1427        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1428        0,
1429        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1430        0,0,0,0,
1431        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1432        0,0,
1433        0,0,MatConvertSameType_MPIAIJ,0,0,
1434        0,0,0,
1435        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1436        MatPrintHelp_MPIAIJ,
1437        MatScale_MPIAIJ,0,0,0,
1438        MatGetBlockSize_MPIAIJ,0,0,0,0,
1439        MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ};
1440 
1441 
1442 #undef __FUNC__
1443 #define __FUNC__ "MatCreateMPIAIJ"
1444 /*@C
1445    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1446    (the default parallel PETSc format).  For good matrix assembly performance
1447    the user should preallocate the matrix storage by setting the parameters
1448    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1449    performance can be increased by more than a factor of 50.
1450 
1451    Input Parameters:
1452 .  comm - MPI communicator
1453 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1454            This value should be the same as the local size used in creating the
1455            y vector for the matrix-vector product y = Ax.
1456 .  n - This value should be the same as the local size used in creating the
1457        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1458        calculated if N is given) For square matrices n is almost always m.
1459 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1460 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1461 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1462            (same for all local rows)
1463 .  d_nzz - array containing the number of nonzeros in the various rows of the
1464            diagonal portion of the local submatrix (possibly different for each row)
1465            or PETSC_NULL. You must leave room for the diagonal entry even if
1466            it is zero.
1467 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1468            submatrix (same for all local rows).
1469 .  o_nzz - array containing the number of nonzeros in the various rows of the
1470            off-diagonal portion of the local submatrix (possibly different for
1471            each row) or PETSC_NULL.
1472 
1473    Output Parameter:
1474 .  A - the matrix
1475 
1476    Notes:
1477    The AIJ format (also called the Yale sparse matrix format or
1478    compressed row storage), is fully compatible with standard Fortran 77
1479    storage.  That is, the stored row and column indices can begin at
1480    either one (as in Fortran) or zero.  See the users manual for details.
1481 
1482    The user MUST specify either the local or global matrix dimensions
1483    (possibly both).
1484 
1485    By default, this format uses inodes (identical nodes) when possible.
1486    We search for consecutive rows with the same nonzero structure, thereby
1487    reusing matrix information to achieve increased efficiency.
1488 
1489    Options Database Keys:
1490 $    -mat_aij_no_inode  - Do not use inodes
1491 $    -mat_aij_inode_limit <limit> - Set inode limit.
1492 $        (max limit=5)
1493 $    -mat_aij_oneindex - Internally use indexing starting at 1
1494 $        rather than 0.  Note: When calling MatSetValues(),
1495 $        the user still MUST index entries starting at 0!
1496 
1497    Storage Information:
1498    For a square global matrix we define each processor's diagonal portion
1499    to be its local rows and the corresponding columns (a square submatrix);
1500    each processor's off-diagonal portion encompasses the remainder of the
1501    local matrix (a rectangular submatrix).
1502 
1503    The user can specify preallocated storage for the diagonal part of
1504    the local submatrix with either d_nz or d_nnz (not both).  Set
1505    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1506    memory allocation.  Likewise, specify preallocated storage for the
1507    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1508 
1509    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1510    the figure below we depict these three local rows and all columns (0-11).
1511 
1512 $          0 1 2 3 4 5 6 7 8 9 10 11
1513 $         -------------------
1514 $  row 3  |  o o o d d d o o o o o o
1515 $  row 4  |  o o o d d d o o o o o o
1516 $  row 5  |  o o o d d d o o o o o o
1517 $         -------------------
1518 $
1519 
1520    Thus, any entries in the d locations are stored in the d (diagonal)
1521    submatrix, and any entries in the o locations are stored in the
1522    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1523    stored simply in the MATSEQAIJ format for compressed row storage.
1524 
1525    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1526    and o_nz should indicate the number of nonzeros per row in the o matrix.
1527    In general, for PDE problems in which most nonzeros are near the diagonal,
1528    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1529    or you will get TERRIBLE performance; see the users' manual chapter on
1530    matrices.
1531 
1532 .keywords: matrix, aij, compressed row, sparse, parallel
1533 
1534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1535 @*/
1536 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1537                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1538 {
1539   Mat          B;
1540   Mat_MPIAIJ   *b;
1541   int          ierr, i,sum[2],work[2],size;
1542 
1543   MPI_Comm_size(comm,&size);
1544   if (size == 1) {
1545     if (M == PETSC_DECIDE) M = m;
1546     if (N == PETSC_DECIDE) N = n;
1547     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1548     return 0;
1549   }
1550 
1551   *A = 0;
1552   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1553   PLogObjectCreate(B);
1554   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1555   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1556   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1557   B->destroy    = MatDestroy_MPIAIJ;
1558   B->view       = MatView_MPIAIJ;
1559   B->factor     = 0;
1560   B->assembled  = PETSC_FALSE;
1561   B->mapping    = 0;
1562 
1563   B->insertmode = NOT_SET_VALUES;
1564   b->size       = size;
1565   MPI_Comm_rank(comm,&b->rank);
1566 
1567   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1568     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1569 
1570   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1571     work[0] = m; work[1] = n;
1572     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1573     if (M == PETSC_DECIDE) M = sum[0];
1574     if (N == PETSC_DECIDE) N = sum[1];
1575   }
1576   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1577   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1578   b->m = m; B->m = m;
1579   b->n = n; B->n = n;
1580   b->N = N; B->N = N;
1581   b->M = M; B->M = M;
1582 
1583   /* build local table of row and column ownerships */
1584   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1585   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1586   b->cowners = b->rowners + b->size + 2;
1587   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1588   b->rowners[0] = 0;
1589   for ( i=2; i<=b->size; i++ ) {
1590     b->rowners[i] += b->rowners[i-1];
1591   }
1592   b->rstart = b->rowners[b->rank];
1593   b->rend   = b->rowners[b->rank+1];
1594   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1595   b->cowners[0] = 0;
1596   for ( i=2; i<=b->size; i++ ) {
1597     b->cowners[i] += b->cowners[i-1];
1598   }
1599   b->cstart = b->cowners[b->rank];
1600   b->cend   = b->cowners[b->rank+1];
1601 
1602   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1603   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1604   PLogObjectParent(B,b->A);
1605   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1606   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1607   PLogObjectParent(B,b->B);
1608 
1609   /* build cache for off array entries formed */
1610   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1611   b->donotstash  = 0;
1612   b->colmap      = 0;
1613   b->garray      = 0;
1614   b->roworiented = 1;
1615 
1616   /* stuff used for matrix vector multiply */
1617   b->lvec      = 0;
1618   b->Mvctx     = 0;
1619 
1620   /* stuff for MatGetRow() */
1621   b->rowindices   = 0;
1622   b->rowvalues    = 0;
1623   b->getrowactive = PETSC_FALSE;
1624 
1625   *A = B;
1626   return 0;
1627 }
1628 
1629 #undef __FUNC__
1630 #define __FUNC__ "MatConvertSameType_MPIAIJ"
1631 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1632 {
1633   Mat        mat;
1634   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1635   int        ierr, len=0, flg;
1636 
1637   *newmat       = 0;
1638   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1639   PLogObjectCreate(mat);
1640   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1641   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1642   mat->destroy    = MatDestroy_MPIAIJ;
1643   mat->view       = MatView_MPIAIJ;
1644   mat->factor     = matin->factor;
1645   mat->assembled  = PETSC_TRUE;
1646 
1647   a->m = mat->m   = oldmat->m;
1648   a->n = mat->n   = oldmat->n;
1649   a->M = mat->M   = oldmat->M;
1650   a->N = mat->N   = oldmat->N;
1651 
1652   a->rstart       = oldmat->rstart;
1653   a->rend         = oldmat->rend;
1654   a->cstart       = oldmat->cstart;
1655   a->cend         = oldmat->cend;
1656   a->size         = oldmat->size;
1657   a->rank         = oldmat->rank;
1658   mat->insertmode = NOT_SET_VALUES;
1659   a->rowvalues    = 0;
1660   a->getrowactive = PETSC_FALSE;
1661 
1662   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1663   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1664   a->cowners = a->rowners + a->size + 2;
1665   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1666   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1667   if (oldmat->colmap) {
1668     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1669     PLogObjectMemory(mat,(a->N)*sizeof(int));
1670     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1671   } else a->colmap = 0;
1672   if (oldmat->garray) {
1673     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1674     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1675     PLogObjectMemory(mat,len*sizeof(int));
1676     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1677   } else a->garray = 0;
1678 
1679   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1680   PLogObjectParent(mat,a->lvec);
1681   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1682   PLogObjectParent(mat,a->Mvctx);
1683   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1684   PLogObjectParent(mat,a->A);
1685   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1686   PLogObjectParent(mat,a->B);
1687   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1688   if (flg) {
1689     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1690   }
1691   *newmat = mat;
1692   return 0;
1693 }
1694 
1695 #include "sys.h"
1696 
1697 #undef __FUNC__
1698 #define __FUNC__ "MatLoad_MPIAIJ"
1699 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1700 {
1701   Mat          A;
1702   int          i, nz, ierr, j,rstart, rend, fd;
1703   Scalar       *vals,*svals;
1704   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1705   MPI_Status   status;
1706   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1707   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1708   int          tag = ((PetscObject)viewer)->tag;
1709 
1710   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1711   if (!rank) {
1712     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1713     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1714     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
1715   }
1716 
1717   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1718   M = header[1]; N = header[2];
1719   /* determine ownership of all rows */
1720   m = M/size + ((M % size) > rank);
1721   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1722   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1723   rowners[0] = 0;
1724   for ( i=2; i<=size; i++ ) {
1725     rowners[i] += rowners[i-1];
1726   }
1727   rstart = rowners[rank];
1728   rend   = rowners[rank+1];
1729 
1730   /* distribute row lengths to all processors */
1731   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1732   offlens = ourlens + (rend-rstart);
1733   if (!rank) {
1734     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1735     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1736     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1737     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1738     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1739     PetscFree(sndcounts);
1740   }
1741   else {
1742     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1743   }
1744 
1745   if (!rank) {
1746     /* calculate the number of nonzeros on each processor */
1747     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1748     PetscMemzero(procsnz,size*sizeof(int));
1749     for ( i=0; i<size; i++ ) {
1750       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1751         procsnz[i] += rowlengths[j];
1752       }
1753     }
1754     PetscFree(rowlengths);
1755 
1756     /* determine max buffer needed and allocate it */
1757     maxnz = 0;
1758     for ( i=0; i<size; i++ ) {
1759       maxnz = PetscMax(maxnz,procsnz[i]);
1760     }
1761     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1762 
1763     /* read in my part of the matrix column indices  */
1764     nz = procsnz[0];
1765     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1766     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1767 
1768     /* read in every one elses and ship off */
1769     for ( i=1; i<size; i++ ) {
1770       nz = procsnz[i];
1771       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1772       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1773     }
1774     PetscFree(cols);
1775   }
1776   else {
1777     /* determine buffer space needed for message */
1778     nz = 0;
1779     for ( i=0; i<m; i++ ) {
1780       nz += ourlens[i];
1781     }
1782     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1783 
1784     /* receive message of column indices*/
1785     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1786     MPI_Get_count(&status,MPI_INT,&maxnz);
1787     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1788   }
1789 
1790   /* loop over local rows, determining number of off diagonal entries */
1791   PetscMemzero(offlens,m*sizeof(int));
1792   jj = 0;
1793   for ( i=0; i<m; i++ ) {
1794     for ( j=0; j<ourlens[i]; j++ ) {
1795       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1796       jj++;
1797     }
1798   }
1799 
1800   /* create our matrix */
1801   for ( i=0; i<m; i++ ) {
1802     ourlens[i] -= offlens[i];
1803   }
1804   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1805   A = *newmat;
1806   MatSetOption(A,MAT_COLUMNS_SORTED);
1807   for ( i=0; i<m; i++ ) {
1808     ourlens[i] += offlens[i];
1809   }
1810 
1811   if (!rank) {
1812     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1813 
1814     /* read in my part of the matrix numerical values  */
1815     nz = procsnz[0];
1816     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1817 
1818     /* insert into matrix */
1819     jj      = rstart;
1820     smycols = mycols;
1821     svals   = vals;
1822     for ( i=0; i<m; i++ ) {
1823       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1824       smycols += ourlens[i];
1825       svals   += ourlens[i];
1826       jj++;
1827     }
1828 
1829     /* read in other processors and ship out */
1830     for ( i=1; i<size; i++ ) {
1831       nz = procsnz[i];
1832       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1833       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1834     }
1835     PetscFree(procsnz);
1836   }
1837   else {
1838     /* receive numeric values */
1839     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1840 
1841     /* receive message of values*/
1842     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1843     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1844     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
1845 
1846     /* insert into matrix */
1847     jj      = rstart;
1848     smycols = mycols;
1849     svals   = vals;
1850     for ( i=0; i<m; i++ ) {
1851       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1852       smycols += ourlens[i];
1853       svals   += ourlens[i];
1854       jj++;
1855     }
1856   }
1857   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1858 
1859   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1860   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1861   return 0;
1862 }
1863