xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision af2d14d21c1ae1f12e83141719c13b01f0eba9cd)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.286 1999/03/09 21:32:49 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i;
28 #if defined (USE_CTABLE)
29   int        ierr;
30 #endif
31 
32   PetscFunctionBegin;
33 #if defined (USE_CTABLE)
34   ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr);
35   for ( i=0; i<n; i++ ){
36     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
37   }
38 #else
39   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
40   PLogObjectMemory(mat,aij->N*sizeof(int));
41   PetscMemzero(aij->colmap,aij->N*sizeof(int));
42   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
43 #endif
44   PetscFunctionReturn(0);
45 }
46 
47 #define CHUNKSIZE   15
48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
49 { \
50  \
51     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
52     rmax = aimax[row]; nrow = ailen[row];  \
53     col1 = col - shift; \
54      \
55     low = 0; high = nrow; \
56     while (high-low > 5) { \
57       t = (low+high)/2; \
58       if (rp[t] > col) high = t; \
59       else             low  = t; \
60     } \
61       for ( _i=0; _i<nrow; _i++ ) { \
62         if (rp[_i] > col1) break; \
63         if (rp[_i] == col1) { \
64           if (addv == ADD_VALUES) ap[_i] += value;   \
65           else                  ap[_i] = value; \
66           goto a_noinsert; \
67         } \
68       }  \
69       if (nonew == 1) goto a_noinsert; \
70       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
71       if (nrow >= rmax) { \
72         /* there is no extra room in row, therefore enlarge */ \
73         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
74         Scalar *new_a; \
75  \
76         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
77  \
78         /* malloc new storage space */ \
79         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
80         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
81         new_j   = (int *) (new_a + new_nz); \
82         new_i   = new_j + new_nz; \
83  \
84         /* copy over old data into new slots */ \
85         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
86         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
87         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
88         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
89         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
90                                                            len*sizeof(int)); \
91         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \
92         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
93                                                            len*sizeof(Scalar));  \
94         /* free up old matrix storage */ \
95  \
96         PetscFree(a->a);  \
97         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int)); \
162         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \
163         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));  \
165         /* free up old matrix storage */ \
166  \
167         PetscFree(b->a);  \
168         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
169         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
170         b->singlemalloc = 1; \
171  \
172         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
173         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
174         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
175         b->maxnz += CHUNKSIZE; \
176         b->reallocs++; \
177       } \
178       N = nrow++ - 1; b->nz++; \
179       /* shift up all the later entries in this row */ \
180       for ( ii=N; ii>=_i; ii-- ) { \
181         rp[ii+1] = rp[ii]; \
182         ap[ii+1] = ap[ii]; \
183       } \
184       rp[_i] = col1;  \
185       ap[_i] = value;  \
186       b_noinsert: ; \
187       bilen[row] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217     if (im[i] < 0) continue;
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart && im[i] < rend) {
222       row = im[i] - rstart;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart && in[j] < cend){
225           col = in[j] - cstart;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230         else if (in[j] < 0) continue;
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (USE_CTABLE)
240             ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ *) B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (roworiented && !aij->donotstash) {
262         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263       } else {
264         if (!aij->donotstash) {
265           row = im[i];
266           for ( j=0; j<n; j++ ) {
267             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr);
268           }
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr =  StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr);
340   /* Free cache space */
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = StashScatterGetMesg_Private(&aij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNC__
402 #define __FUNC__ "MatZeroEntries_MPIAIJ"
403 int MatZeroEntries_MPIAIJ(Mat A)
404 {
405   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
406   int        ierr;
407 
408   PetscFunctionBegin;
409   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
410   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNC__
415 #define __FUNC__ "MatZeroRows_MPIAIJ"
416 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
417 {
418   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
419   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
420   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
421   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
422   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
423   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
424   MPI_Comm       comm = A->comm;
425   MPI_Request    *send_waits,*recv_waits;
426   MPI_Status     recv_status,*send_status;
427   IS             istmp;
428 
429   PetscFunctionBegin;
430   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
431   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
432 
433   /*  first count number of contributors to each processor */
434   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
435   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
436   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
437   for ( i=0; i<N; i++ ) {
438     idx = rows[i];
439     found = 0;
440     for ( j=0; j<size; j++ ) {
441       if (idx >= owners[j] && idx < owners[j+1]) {
442         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
443       }
444     }
445     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
446   }
447   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
448 
449   /* inform other processors of number of messages and max length*/
450   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
451   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
452   nrecvs = work[rank];
453   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
454   nmax = work[rank];
455   PetscFree(work);
456 
457   /* post receives:   */
458   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
459   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
460   for ( i=0; i<nrecvs; i++ ) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
469   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
470   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
471   starts[0] = 0;
472   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
473   for ( i=0; i<N; i++ ) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ISRestoreIndices(is,&rows);
477 
478   starts[0] = 0;
479   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
480   count = 0;
481   for ( i=0; i<size; i++ ) {
482     if (procs[i]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   PetscFree(starts);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]  = n;
500     slen += n;
501     count--;
502   }
503   PetscFree(recv_waits);
504 
505   /* move the data into the send scatter */
506   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
507   count = 0;
508   for ( i=0; i<nrecvs; i++ ) {
509     values = rvalues + i*nmax;
510     for ( j=0; j<lens[i]; j++ ) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   PetscFree(rvalues); PetscFree(lens);
515   PetscFree(owner); PetscFree(nprocs);
516 
517   /* actually zap the local rows */
518   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
519   PLogObjectParent(A,istmp);
520 
521   /*
522         Zero the required rows. If the "diagonal block" of the matrix
523      is square and the user wishes to set the diagonal we use seperate
524      code so that MatSetValues() is not called for each diagonal allocating
525      new memory, thus calling lots of mallocs and slowing things down.
526 
527        Contributed by: Mathew Knepley
528   */
529   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
530   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
531   if (diag && (l->A->M == l->A->N)) {
532     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
533   } else if (diag) {
534     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
535     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
536       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
538     }
539     for ( i = 0; i < slen; i++ ) {
540       row  = lrows[i] + rstart;
541       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
542     }
543     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545   } else {
546     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
547   }
548   ierr = ISDestroy(istmp); CHKERRQ(ierr);
549   PetscFree(lrows);
550 
551   /* wait on sends */
552   if (nsends) {
553     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
554     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
555     PetscFree(send_status);
556   }
557   PetscFree(send_waits); PetscFree(svalues);
558 
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNC__
563 #define __FUNC__ "MatMult_MPIAIJ"
564 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
565 {
566   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
567   int        ierr,nt;
568 
569   PetscFunctionBegin;
570   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
571   if (nt != a->n) {
572     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
573   }
574   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
575   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
576   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr);
577   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNC__
582 #define __FUNC__ "MatMultAdd_MPIAIJ"
583 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
584 {
585   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
586   int        ierr;
587 
588   PetscFunctionBegin;
589   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
591   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNC__
597 #define __FUNC__ "MatMultTrans_MPIAIJ"
598 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
599 {
600   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
601   int        ierr;
602 
603   PetscFunctionBegin;
604   /* do nondiagonal part */
605   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
606   /* send it on its way */
607   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
608   /* do local part */
609   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
610   /* receive remote parts: note this assumes the values are not actually */
611   /* inserted in yy until the next line, which is true for my implementation*/
612   /* but is not perhaps always true. */
613   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNC__
618 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
619 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
620 {
621   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
622   int        ierr;
623 
624   PetscFunctionBegin;
625   /* do nondiagonal part */
626   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
627   /* send it on its way */
628   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
629   /* do local part */
630   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
631   /* receive remote parts: note this assumes the values are not actually */
632   /* inserted in yy until the next line, which is true for my implementation*/
633   /* but is not perhaps always true. */
634   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
635   PetscFunctionReturn(0);
636 }
637 
638 /*
639   This only works correctly for square matrices where the subblock A->A is the
640    diagonal block
641 */
642 #undef __FUNC__
643 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
644 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
645 {
646   int        ierr;
647   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
648 
649   PetscFunctionBegin;
650   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
651   if (a->rstart != a->cstart || a->rend != a->cend) {
652     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
653   }
654   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatScale_MPIAIJ"
660 int MatScale_MPIAIJ(Scalar *aa,Mat A)
661 {
662   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
663   int        ierr;
664 
665   PetscFunctionBegin;
666   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
667   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNC__
672 #define __FUNC__ "MatDestroy_MPIAIJ"
673 int MatDestroy_MPIAIJ(Mat mat)
674 {
675   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
676   int        ierr;
677 
678   PetscFunctionBegin;
679   if (--mat->refct > 0) PetscFunctionReturn(0);
680 
681   if (mat->mapping) {
682     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
683   }
684   if (mat->bmapping) {
685     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
686   }
687   if (mat->rmap) {
688     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
689   }
690   if (mat->cmap) {
691     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
692   }
693 #if defined(USE_PETSC_LOG)
694   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
695 #endif
696   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
697   PetscFree(aij->rowners);
698   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
699   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
700 #if defined (USE_CTABLE)
701   if (aij->colmap) TableDelete(aij->colmap);
702 #else
703   if (aij->colmap) PetscFree(aij->colmap);
704 #endif
705   if (aij->garray) PetscFree(aij->garray);
706   if (aij->lvec)   VecDestroy(aij->lvec);
707   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
708   if (aij->rowvalues) PetscFree(aij->rowvalues);
709   PetscFree(aij);
710   PLogObjectDestroy(mat);
711   PetscHeaderDestroy(mat);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNC__
716 #define __FUNC__ "MatView_MPIAIJ_Binary"
717 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
718 {
719   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
720   int         ierr;
721 
722   PetscFunctionBegin;
723   if (aij->size == 1) {
724     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
725   }
726   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNC__
731 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
732 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
733 {
734   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
735   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
736   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
737   int         size = aij->size;
738   FILE        *fd;
739   ViewerType  vtype;
740 
741   PetscFunctionBegin;
742   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
743   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
744     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
745     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
746       MatInfo info;
747       int     flg;
748       MPI_Comm_rank(mat->comm,&rank);
749       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
750       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
751       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
752       PetscSequentialPhaseBegin(mat->comm,1);
753       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
754          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
755       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
756          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
757       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
758       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
759       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
760       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
761       fflush(fd);
762       PetscSequentialPhaseEnd(mat->comm,1);
763       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
764       PetscFunctionReturn(0);
765     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
766       PetscFunctionReturn(0);
767     }
768   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
769     Draw       draw;
770     PetscTruth isnull;
771     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
772     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
773   }
774 
775   if (size == 1) {
776     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
777   } else {
778     /* assemble the entire matrix onto first processor. */
779     Mat         A;
780     Mat_SeqAIJ *Aloc;
781     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
782     Scalar      *a;
783 
784     if (!rank) {
785       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
786     } else {
787       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
788     }
789     PLogObjectParent(mat,A);
790 
791     /* copy over the A part */
792     Aloc = (Mat_SeqAIJ*) aij->A->data;
793     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
794     row = aij->rstart;
795     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
796     for ( i=0; i<m; i++ ) {
797       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
798       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
799     }
800     aj = Aloc->j;
801     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
802 
803     /* copy over the B part */
804     Aloc = (Mat_SeqAIJ*) aij->B->data;
805     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
806     row = aij->rstart;
807     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
808     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
809     for ( i=0; i<m; i++ ) {
810       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
811       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
812     }
813     PetscFree(ct);
814     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
815     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
816     /*
817        Everyone has to call to draw the matrix since the graphics waits are
818        synchronized across all processors that share the Draw object
819     */
820     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
821       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
822     }
823     ierr = MatDestroy(A); CHKERRQ(ierr);
824   }
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNC__
829 #define __FUNC__ "MatView_MPIAIJ"
830 int MatView_MPIAIJ(Mat mat,Viewer viewer)
831 {
832   int         ierr;
833   ViewerType  vtype;
834 
835   PetscFunctionBegin;
836   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
837   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
838       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
839     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
840   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
841     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
842   } else {
843     SETERRQ(1,1,"Viewer type not supported by PETSc object");
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849     This has to provide several versions.
850 
851      2) a) use only local smoothing updating outer values only once.
852         b) local smoothing updating outer values each inner iteration
853      3) color updating out values betwen colors.
854 */
855 #undef __FUNC__
856 #define __FUNC__ "MatRelax_MPIAIJ"
857 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
858                            double fshift,int its,Vec xx)
859 {
860   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
861   Mat        AA = mat->A, BB = mat->B;
862   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
863   Scalar     *b,*x,*xs,*ls,d,*v,sum;
864   int        ierr,*idx, *diag;
865   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
866 
867   PetscFunctionBegin;
868   ierr = VecGetArray(xx,&x); CHKERRQ(ierr);
869   ierr = VecGetArray(bb,&b); CHKERRQ(ierr);
870   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
871   xs = x + shift; /* shift by one for index start of 1 */
872   ls = ls + shift;
873   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);}
874   diag = A->diag;
875   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
876     if (flag & SOR_ZERO_INITIAL_GUESS) {
877       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
878       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
879       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
880       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
881       PetscFunctionReturn(0);
882     }
883     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
884     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
885     while (its--) {
886       /* go down through the rows */
887       for ( i=0; i<m; i++ ) {
888         n    = A->i[i+1] - A->i[i];
889 	PLogFlops(4*n+3);
890         idx  = A->j + A->i[i] + shift;
891         v    = A->a + A->i[i] + shift;
892         sum  = b[i];
893         SPARSEDENSEMDOT(sum,xs,v,idx,n);
894         d    = fshift + A->a[diag[i]+shift];
895         n    = B->i[i+1] - B->i[i];
896         idx  = B->j + B->i[i] + shift;
897         v    = B->a + B->i[i] + shift;
898         SPARSEDENSEMDOT(sum,ls,v,idx,n);
899         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
900       }
901       /* come up through the rows */
902       for ( i=m-1; i>-1; i-- ) {
903         n    = A->i[i+1] - A->i[i];
904 	PLogFlops(4*n+3)
905         idx  = A->j + A->i[i] + shift;
906         v    = A->a + A->i[i] + shift;
907         sum  = b[i];
908         SPARSEDENSEMDOT(sum,xs,v,idx,n);
909         d    = fshift + A->a[diag[i]+shift];
910         n    = B->i[i+1] - B->i[i];
911         idx  = B->j + B->i[i] + shift;
912         v    = B->a + B->i[i] + shift;
913         SPARSEDENSEMDOT(sum,ls,v,idx,n);
914         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
915       }
916     }
917   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
918     if (flag & SOR_ZERO_INITIAL_GUESS) {
919       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
920       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
921       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
922       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
923       PetscFunctionReturn(0);
924     }
925     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
926     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
927     while (its--) {
928       for ( i=0; i<m; i++ ) {
929         n    = A->i[i+1] - A->i[i];
930 	PLogFlops(4*n+3);
931         idx  = A->j + A->i[i] + shift;
932         v    = A->a + A->i[i] + shift;
933         sum  = b[i];
934         SPARSEDENSEMDOT(sum,xs,v,idx,n);
935         d    = fshift + A->a[diag[i]+shift];
936         n    = B->i[i+1] - B->i[i];
937         idx  = B->j + B->i[i] + shift;
938         v    = B->a + B->i[i] + shift;
939         SPARSEDENSEMDOT(sum,ls,v,idx,n);
940         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
941       }
942     }
943   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
944     if (flag & SOR_ZERO_INITIAL_GUESS) {
945       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
946       ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
947       ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
948       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
949       PetscFunctionReturn(0);
950     }
951     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
952     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
953     while (its--) {
954       for ( i=m-1; i>-1; i-- ) {
955         n    = A->i[i+1] - A->i[i];
956 	PLogFlops(4*n+3);
957         idx  = A->j + A->i[i] + shift;
958         v    = A->a + A->i[i] + shift;
959         sum  = b[i];
960         SPARSEDENSEMDOT(sum,xs,v,idx,n);
961         d    = fshift + A->a[diag[i]+shift];
962         n    = B->i[i+1] - B->i[i];
963         idx  = B->j + B->i[i] + shift;
964         v    = B->a + B->i[i] + shift;
965         SPARSEDENSEMDOT(sum,ls,v,idx,n);
966         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
967       }
968     }
969   } else {
970     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
971   }
972   ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr);
973   ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr);
974   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNC__
979 #define __FUNC__ "MatGetInfo_MPIAIJ"
980 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
981 {
982   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
983   Mat        A = mat->A, B = mat->B;
984   int        ierr;
985   double     isend[5], irecv[5];
986 
987   PetscFunctionBegin;
988   info->block_size     = 1.0;
989   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
990   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
991   isend[3] = info->memory;  isend[4] = info->mallocs;
992   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
993   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
994   isend[3] += info->memory;  isend[4] += info->mallocs;
995   if (flag == MAT_LOCAL) {
996     info->nz_used      = isend[0];
997     info->nz_allocated = isend[1];
998     info->nz_unneeded  = isend[2];
999     info->memory       = isend[3];
1000     info->mallocs      = isend[4];
1001   } else if (flag == MAT_GLOBAL_MAX) {
1002     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1003     info->nz_used      = irecv[0];
1004     info->nz_allocated = irecv[1];
1005     info->nz_unneeded  = irecv[2];
1006     info->memory       = irecv[3];
1007     info->mallocs      = irecv[4];
1008   } else if (flag == MAT_GLOBAL_SUM) {
1009     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1010     info->nz_used      = irecv[0];
1011     info->nz_allocated = irecv[1];
1012     info->nz_unneeded  = irecv[2];
1013     info->memory       = irecv[3];
1014     info->mallocs      = irecv[4];
1015   }
1016   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1017   info->fill_ratio_needed = 0;
1018   info->factor_mallocs    = 0;
1019   info->rows_global       = (double)mat->M;
1020   info->columns_global    = (double)mat->N;
1021   info->rows_local        = (double)mat->m;
1022   info->columns_local     = (double)mat->N;
1023 
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNC__
1028 #define __FUNC__ "MatSetOption_MPIAIJ"
1029 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1030 {
1031   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1032   int        ierr;
1033 
1034   PetscFunctionBegin;
1035   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1036       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1037       op == MAT_COLUMNS_UNSORTED ||
1038       op == MAT_COLUMNS_SORTED ||
1039       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1040       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1041         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1042         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1043   } else if (op == MAT_ROW_ORIENTED) {
1044     a->roworiented = 1;
1045     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1046     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1047   } else if (op == MAT_ROWS_SORTED ||
1048              op == MAT_ROWS_UNSORTED ||
1049              op == MAT_SYMMETRIC ||
1050              op == MAT_STRUCTURALLY_SYMMETRIC ||
1051              op == MAT_YES_NEW_DIAGONALS)
1052     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1053   else if (op == MAT_COLUMN_ORIENTED) {
1054     a->roworiented = 0;
1055     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1056     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1057   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1058     a->donotstash = 1;
1059   } else if (op == MAT_NO_NEW_DIAGONALS){
1060     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1061   } else {
1062     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ "MatGetSize_MPIAIJ"
1069 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1070 {
1071   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1072 
1073   PetscFunctionBegin;
1074   if (m) *m = mat->M;
1075   if (n) *n = mat->N;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNC__
1080 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1081 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1084 
1085   PetscFunctionBegin;
1086   if (m) *m = mat->m;
1087   if (n) *n = mat->n;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNC__
1092 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1093 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1094 {
1095   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1096 
1097   PetscFunctionBegin;
1098   *m = mat->rstart; *n = mat->rend;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatGetRow_MPIAIJ"
1104 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1105 {
1106   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1107   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1108   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1109   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1110   int        *cmap, *idx_p;
1111 
1112   PetscFunctionBegin;
1113   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1114   mat->getrowactive = PETSC_TRUE;
1115 
1116   if (!mat->rowvalues && (idx || v)) {
1117     /*
1118         allocate enough space to hold information from the longest row.
1119     */
1120     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1121     int     max = 1,m = mat->m,tmp;
1122     for ( i=0; i<m; i++ ) {
1123       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1124       if (max < tmp) { max = tmp; }
1125     }
1126     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1127     mat->rowindices = (int *) (mat->rowvalues + max);
1128   }
1129 
1130   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1131   lrow = row - rstart;
1132 
1133   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1134   if (!v)   {pvA = 0; pvB = 0;}
1135   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1136   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1137   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1138   nztot = nzA + nzB;
1139 
1140   cmap  = mat->garray;
1141   if (v  || idx) {
1142     if (nztot) {
1143       /* Sort by increasing column numbers, assuming A and B already sorted */
1144       int imark = -1;
1145       if (v) {
1146         *v = v_p = mat->rowvalues;
1147         for ( i=0; i<nzB; i++ ) {
1148           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1149           else break;
1150         }
1151         imark = i;
1152         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1153         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1154       }
1155       if (idx) {
1156         *idx = idx_p = mat->rowindices;
1157         if (imark > -1) {
1158           for ( i=0; i<imark; i++ ) {
1159             idx_p[i] = cmap[cworkB[i]];
1160           }
1161         } else {
1162           for ( i=0; i<nzB; i++ ) {
1163             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1164             else break;
1165           }
1166           imark = i;
1167         }
1168         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1169         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1170       }
1171     } else {
1172       if (idx) *idx = 0;
1173       if (v)   *v   = 0;
1174     }
1175   }
1176   *nz = nztot;
1177   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1178   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNC__
1183 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1184 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1185 {
1186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1187 
1188   PetscFunctionBegin;
1189   if (aij->getrowactive == PETSC_FALSE) {
1190     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1191   }
1192   aij->getrowactive = PETSC_FALSE;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNC__
1197 #define __FUNC__ "MatNorm_MPIAIJ"
1198 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1199 {
1200   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1201   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1202   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1203   double     sum = 0.0;
1204   Scalar     *v;
1205 
1206   PetscFunctionBegin;
1207   if (aij->size == 1) {
1208     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1209   } else {
1210     if (type == NORM_FROBENIUS) {
1211       v = amat->a;
1212       for (i=0; i<amat->nz; i++ ) {
1213 #if defined(USE_PETSC_COMPLEX)
1214         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1215 #else
1216         sum += (*v)*(*v); v++;
1217 #endif
1218       }
1219       v = bmat->a;
1220       for (i=0; i<bmat->nz; i++ ) {
1221 #if defined(USE_PETSC_COMPLEX)
1222         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1223 #else
1224         sum += (*v)*(*v); v++;
1225 #endif
1226       }
1227       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1228       *norm = sqrt(*norm);
1229     } else if (type == NORM_1) { /* max column norm */
1230       double *tmp, *tmp2;
1231       int    *jj, *garray = aij->garray;
1232       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp);
1233       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2);
1234       PetscMemzero(tmp,aij->N*sizeof(double));
1235       *norm = 0.0;
1236       v = amat->a; jj = amat->j;
1237       for ( j=0; j<amat->nz; j++ ) {
1238         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1239       }
1240       v = bmat->a; jj = bmat->j;
1241       for ( j=0; j<bmat->nz; j++ ) {
1242         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1243       }
1244       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1245       for ( j=0; j<aij->N; j++ ) {
1246         if (tmp2[j] > *norm) *norm = tmp2[j];
1247       }
1248       PetscFree(tmp); PetscFree(tmp2);
1249     } else if (type == NORM_INFINITY) { /* max row norm */
1250       double ntemp = 0.0;
1251       for ( j=0; j<amat->m; j++ ) {
1252         v = amat->a + amat->i[j] + shift;
1253         sum = 0.0;
1254         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1255           sum += PetscAbsScalar(*v); v++;
1256         }
1257         v = bmat->a + bmat->i[j] + shift;
1258         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1259           sum += PetscAbsScalar(*v); v++;
1260         }
1261         if (sum > ntemp) ntemp = sum;
1262       }
1263       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1264     } else {
1265       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1266     }
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ "MatTranspose_MPIAIJ"
1273 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1274 {
1275   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1276   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1277   int        ierr,shift = Aloc->indexshift;
1278   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1279   Mat        B;
1280   Scalar     *array;
1281 
1282   PetscFunctionBegin;
1283   if (matout == PETSC_NULL && M != N) {
1284     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1285   }
1286 
1287   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1288 
1289   /* copy over the A part */
1290   Aloc = (Mat_SeqAIJ*) a->A->data;
1291   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1292   row = a->rstart;
1293   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1294   for ( i=0; i<m; i++ ) {
1295     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1296     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1297   }
1298   aj = Aloc->j;
1299   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1300 
1301   /* copy over the B part */
1302   Aloc = (Mat_SeqAIJ*) a->B->data;
1303   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1304   row = a->rstart;
1305   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1306   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1307   for ( i=0; i<m; i++ ) {
1308     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1309     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1310   }
1311   PetscFree(ct);
1312   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1313   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1314   if (matout != PETSC_NULL) {
1315     *matout = B;
1316   } else {
1317     PetscOps       *Abops;
1318     struct _MatOps *Aops;
1319 
1320     /* This isn't really an in-place transpose .... but free data structures from a */
1321     PetscFree(a->rowners);
1322     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1323     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1324 #if defined (USE_CTABLE)
1325     if (a->colmap) TableDelete(a->colmap);
1326 #else
1327     if (a->colmap) PetscFree(a->colmap);
1328 #endif
1329     if (a->garray) PetscFree(a->garray);
1330     if (a->lvec) VecDestroy(a->lvec);
1331     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1332     PetscFree(a);
1333 
1334     /*
1335        This is horrible, horrible code. We need to keep the
1336       A pointers for the bops and ops but copy everything
1337       else from C.
1338     */
1339     Abops = A->bops;
1340     Aops  = A->ops;
1341     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1342     A->bops = Abops;
1343     A->ops  = Aops;
1344     PetscHeaderDestroy(B);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNC__
1350 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1351 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1352 {
1353   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1354   Mat a = aij->A, b = aij->B;
1355   int ierr,s1,s2,s3;
1356 
1357   PetscFunctionBegin;
1358   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1359   if (rr) {
1360     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1361     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1362     /* Overlap communication with computation. */
1363     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1364   }
1365   if (ll) {
1366     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1367     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1368     ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr);
1369   }
1370   /* scale  the diagonal block */
1371   ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1372 
1373   if (rr) {
1374     /* Do a scatter end and then right scale the off-diagonal block */
1375     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr);
1376     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1377   }
1378 
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 
1383 #undef __FUNC__
1384 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1385 int MatPrintHelp_MPIAIJ(Mat A)
1386 {
1387   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1388   int        ierr;
1389 
1390   PetscFunctionBegin;
1391   if (!a->rank) {
1392     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNC__
1398 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1399 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1400 {
1401   PetscFunctionBegin;
1402   *bs = 1;
1403   PetscFunctionReturn(0);
1404 }
1405 #undef __FUNC__
1406 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1407 int MatSetUnfactored_MPIAIJ(Mat A)
1408 {
1409   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1410   int        ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNC__
1418 #define __FUNC__ "MatEqual_MPIAIJ"
1419 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1420 {
1421   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1422   Mat        a, b, c, d;
1423   PetscTruth flg;
1424   int        ierr;
1425 
1426   PetscFunctionBegin;
1427   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1428   a = matA->A; b = matA->B;
1429   c = matB->A; d = matB->B;
1430 
1431   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1432   if (flg == PETSC_TRUE) {
1433     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1434   }
1435   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNC__
1440 #define __FUNC__ "MatCopy_MPIAIJ"
1441 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1442 {
1443   int        ierr;
1444   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1445   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1446 
1447   PetscFunctionBegin;
1448   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1449     /* because of the column compression in the off-processor part of the matrix a->B,
1450        the number of columns in a->B and b->B may be different, hence we cannot call
1451        the MatCopy() directly on the two parts. If need be, we can provide a more
1452        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1453        then copying the submatrices */
1454     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1455   } else {
1456     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1457     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1458   }
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1463 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1464 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1465 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1466 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1467 
1468 /* -------------------------------------------------------------------*/
1469 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1470        MatGetRow_MPIAIJ,
1471        MatRestoreRow_MPIAIJ,
1472        MatMult_MPIAIJ,
1473        MatMultAdd_MPIAIJ,
1474        MatMultTrans_MPIAIJ,
1475        MatMultTransAdd_MPIAIJ,
1476        0,
1477        0,
1478        0,
1479        0,
1480        0,
1481        0,
1482        MatRelax_MPIAIJ,
1483        MatTranspose_MPIAIJ,
1484        MatGetInfo_MPIAIJ,
1485        MatEqual_MPIAIJ,
1486        MatGetDiagonal_MPIAIJ,
1487        MatDiagonalScale_MPIAIJ,
1488        MatNorm_MPIAIJ,
1489        MatAssemblyBegin_MPIAIJ,
1490        MatAssemblyEnd_MPIAIJ,
1491        0,
1492        MatSetOption_MPIAIJ,
1493        MatZeroEntries_MPIAIJ,
1494        MatZeroRows_MPIAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499        MatGetSize_MPIAIJ,
1500        MatGetLocalSize_MPIAIJ,
1501        MatGetOwnershipRange_MPIAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506        MatDuplicate_MPIAIJ,
1507        0,
1508        0,
1509        0,
1510        0,
1511        0,
1512        MatGetSubMatrices_MPIAIJ,
1513        MatIncreaseOverlap_MPIAIJ,
1514        MatGetValues_MPIAIJ,
1515        MatCopy_MPIAIJ,
1516        MatPrintHelp_MPIAIJ,
1517        MatScale_MPIAIJ,
1518        0,
1519        0,
1520        0,
1521        MatGetBlockSize_MPIAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526        MatFDColoringCreate_MPIAIJ,
1527        0,
1528        MatSetUnfactored_MPIAIJ,
1529        0,
1530        0,
1531        MatGetSubMatrix_MPIAIJ,
1532        0,
1533        0,
1534        MatGetMaps_Petsc};
1535 
1536 /* ----------------------------------------------------------------------------------------*/
1537 
1538 EXTERN_C_BEGIN
1539 #undef __FUNC__
1540 #define __FUNC__ "MatStoreValues_MPIAIJ"
1541 int MatStoreValues_MPIAIJ(Mat mat)
1542 {
1543   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1544   int        ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1548   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1549   PetscFunctionReturn(0);
1550 }
1551 EXTERN_C_END
1552 
1553 EXTERN_C_BEGIN
1554 #undef __FUNC__
1555 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1556 int MatRetrieveValues_MPIAIJ(Mat mat)
1557 {
1558   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1559   int        ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1563   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 EXTERN_C_END
1567 
1568 #include "pc.h"
1569 EXTERN_C_BEGIN
1570 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1571 EXTERN_C_END
1572 
1573 #undef __FUNC__
1574 #define __FUNC__ "MatCreateMPIAIJ"
1575 /*@C
1576    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1577    (the default parallel PETSc format).  For good matrix assembly performance
1578    the user should preallocate the matrix storage by setting the parameters
1579    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1580    performance can be increased by more than a factor of 50.
1581 
1582    Collective on MPI_Comm
1583 
1584    Input Parameters:
1585 +  comm - MPI communicator
1586 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1587            This value should be the same as the local size used in creating the
1588            y vector for the matrix-vector product y = Ax.
1589 .  n - This value should be the same as the local size used in creating the
1590        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1591        calculated if N is given) For square matrices n is almost always m.
1592 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1593 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1594 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1595            (same value is used for all local rows)
1596 .  d_nnz - array containing the number of nonzeros in the various rows of the
1597            DIAGONAL portion of the local submatrix (possibly different for each row)
1598            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1599            The size of this array is equal to the number of local rows, i.e 'm'.
1600            You must leave room for the diagonal entry even if it is zero.
1601 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1602            submatrix (same value is used for all local rows).
1603 -  o_nnz - array containing the number of nonzeros in the various rows of the
1604            OFF-DIAGONAL portion of the local submatrix (possibly different for
1605            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1606            structure. The size of this array is equal to the number
1607            of local rows, i.e 'm'.
1608 
1609    Output Parameter:
1610 .  A - the matrix
1611 
1612    Notes:
1613    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1614    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1615    storage requirements for this matrix.
1616 
1617    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1618    processor than it must be used on all processors that share the object for
1619    that argument.
1620 
1621    The AIJ format (also called the Yale sparse matrix format or
1622    compressed row storage), is fully compatible with standard Fortran 77
1623    storage.  That is, the stored row and column indices can begin at
1624    either one (as in Fortran) or zero.  See the users manual for details.
1625 
1626    The user MUST specify either the local or global matrix dimensions
1627    (possibly both).
1628 
1629    The parallel matrix is partitioned such that the first m0 rows belong to
1630    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1631    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1632 
1633    The DIAGONAL portion of the local submatrix of a processor can be defined
1634    as the submatrix which is obtained by extraction the part corresponding
1635    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1636    first row that belongs to the processor, and r2 is the last row belonging
1637    to the this processor. This is a square mxm matrix. The remaining portion
1638    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1639 
1640    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1641 
1642    By default, this format uses inodes (identical nodes) when possible.
1643    We search for consecutive rows with the same nonzero structure, thereby
1644    reusing matrix information to achieve increased efficiency.
1645 
1646    Options Database Keys:
1647 +  -mat_aij_no_inode  - Do not use inodes
1648 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1649 -  -mat_aij_oneindex - Internally use indexing starting at 1
1650         rather than 0.  Note that when calling MatSetValues(),
1651         the user still MUST index entries starting at 0!
1652 .   -mat_mpi - use the parallel matrix data structures even on one processor
1653                (defaults to using SeqBAIJ format on one processor)
1654 .   -mat_mpi - use the parallel matrix data structures even on one processor
1655                (defaults to using SeqAIJ format on one processor)
1656 
1657 
1658    Example usage:
1659 
1660    Consider the following 8x8 matrix with 34 non-zero values, that is
1661    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1662    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1663    as follows:
1664 
1665 .vb
1666             1  2  0  |  0  3  0  |  0  4
1667     Proc0   0  5  6  |  7  0  0  |  8  0
1668             9  0 10  | 11  0  0  | 12  0
1669     -------------------------------------
1670            13  0 14  | 15 16 17  |  0  0
1671     Proc1   0 18  0  | 19 20 21  |  0  0
1672             0  0  0  | 22 23  0  | 24  0
1673     -------------------------------------
1674     Proc2  25 26 27  |  0  0 28  | 29  0
1675            30  0  0  | 31 32 33  |  0 34
1676 .ve
1677 
1678    This can be represented as a collection of submatrices as:
1679 
1680 .vb
1681       A B C
1682       D E F
1683       G H I
1684 .ve
1685 
1686    Where the submatrices A,B,C are owned by proc0, D,E,F are
1687    owned by proc1, G,H,I are owned by proc2.
1688 
1689    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1690    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1691    The 'M','N' parameters are 8,8, and have the same values on all procs.
1692 
1693    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1694    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1695    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1696    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1697    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1698    matrix, ans [DF] as another SeqAIJ matrix.
1699 
1700    When d_nz, o_nz parameters are specified, d_nz storage elements are
1701    allocated for every row of the local diagonal submatrix, and o_nz
1702    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1703    One way to choose d_nz and o_nz is to use the max nonzerors per local
1704    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1705    In this case, the values of d_nz,o_nz are:
1706 .vb
1707      proc0 : dnz = 2, o_nz = 2
1708      proc1 : dnz = 3, o_nz = 2
1709      proc2 : dnz = 1, o_nz = 4
1710 .ve
1711    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1712    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1713    for proc3. i.e we are using 12+15+10=37 storage locations to store
1714    34 values.
1715 
1716    When d_nnz, o_nnz parameters are specified, the storage is specified
1717    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1718    In the above case the values for d_nnz,o_nnz are:
1719 .vb
1720      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1721      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1722      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1723 .ve
1724    Here the space allocated is sum of all the above values i.e 34, and
1725    hence pre-allocation is perfect.
1726 
1727    Level: intermediate
1728 
1729 .keywords: matrix, aij, compressed row, sparse, parallel
1730 
1731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1732 @*/
1733 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1734 {
1735   Mat          B;
1736   Mat_MPIAIJ   *b;
1737   int          ierr, i,size,flag1 = 0, flag2 = 0;
1738 
1739   PetscFunctionBegin;
1740   MPI_Comm_size(comm,&size);
1741   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr);
1742   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1743   if (!flag1 && !flag2 && size == 1) {
1744     if (M == PETSC_DECIDE) M = m;
1745     if (N == PETSC_DECIDE) N = n;
1746     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1747     PetscFunctionReturn(0);
1748   }
1749 
1750   *A = 0;
1751   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1752   PLogObjectCreate(B);
1753   B->data            = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1754   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1755   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1756   B->ops->destroy    = MatDestroy_MPIAIJ;
1757   B->ops->view       = MatView_MPIAIJ;
1758   B->factor          = 0;
1759   B->assembled       = PETSC_FALSE;
1760   B->mapping         = 0;
1761 
1762   B->insertmode      = NOT_SET_VALUES;
1763   b->size            = size;
1764   MPI_Comm_rank(comm,&b->rank);
1765 
1766   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1767     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1768   }
1769 
1770   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1771   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1772   b->m = m; B->m = m;
1773   b->n = n; B->n = n;
1774   b->N = N; B->N = N;
1775   b->M = M; B->M = M;
1776 
1777   /* the information in the maps duplicates the information computed below, eventually
1778      we should remove the duplicate information that is not contained in the maps */
1779   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1780   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1781 
1782   /* build local table of row and column ownerships */
1783   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1784   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1785   b->cowners = b->rowners + b->size + 2;
1786   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1787   b->rowners[0] = 0;
1788   for ( i=2; i<=b->size; i++ ) {
1789     b->rowners[i] += b->rowners[i-1];
1790   }
1791   b->rstart = b->rowners[b->rank];
1792   b->rend   = b->rowners[b->rank+1];
1793   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1794   b->cowners[0] = 0;
1795   for ( i=2; i<=b->size; i++ ) {
1796     b->cowners[i] += b->cowners[i-1];
1797   }
1798   b->cstart = b->cowners[b->rank];
1799   b->cend   = b->cowners[b->rank+1];
1800 
1801   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1802   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1803   PLogObjectParent(B,b->A);
1804   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1805   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1806   PLogObjectParent(B,b->B);
1807 
1808   /* build cache for off array entries formed */
1809   ierr = StashCreate_Private(comm,1,1,&b->stash); CHKERRQ(ierr);
1810   b->donotstash  = 0;
1811   b->colmap      = 0;
1812   b->garray      = 0;
1813   b->roworiented = 1;
1814 
1815   /* stuff used for matrix vector multiply */
1816   b->lvec      = 0;
1817   b->Mvctx     = 0;
1818 
1819   /* stuff for MatGetRow() */
1820   b->rowindices   = 0;
1821   b->rowvalues    = 0;
1822   b->getrowactive = PETSC_FALSE;
1823 
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1825                                      "MatStoreValues_MPIAIJ",
1826                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1828                                      "MatRetrieveValues_MPIAIJ",
1829                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1831                                      "MatGetDiagonalBlock_MPIAIJ",
1832                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1833   *A = B;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNC__
1838 #define __FUNC__ "MatDuplicate_MPIAIJ"
1839 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1840 {
1841   Mat        mat;
1842   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1843   int        ierr, len=0, flg;
1844 
1845   PetscFunctionBegin;
1846   *newmat       = 0;
1847   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1848   PLogObjectCreate(mat);
1849   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1850   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1851   mat->ops->destroy = MatDestroy_MPIAIJ;
1852   mat->ops->view    = MatView_MPIAIJ;
1853   mat->factor       = matin->factor;
1854   mat->assembled    = PETSC_TRUE;
1855   mat->insertmode   = NOT_SET_VALUES;
1856 
1857   a->m = mat->m   = oldmat->m;
1858   a->n = mat->n   = oldmat->n;
1859   a->M = mat->M   = oldmat->M;
1860   a->N = mat->N   = oldmat->N;
1861 
1862   a->rstart       = oldmat->rstart;
1863   a->rend         = oldmat->rend;
1864   a->cstart       = oldmat->cstart;
1865   a->cend         = oldmat->cend;
1866   a->size         = oldmat->size;
1867   a->rank         = oldmat->rank;
1868   a->donotstash   = oldmat->donotstash;
1869   a->roworiented  = oldmat->roworiented;
1870   a->rowindices   = 0;
1871   a->rowvalues    = 0;
1872   a->getrowactive = PETSC_FALSE;
1873 
1874   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1875   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1876   a->cowners = a->rowners + a->size + 2;
1877   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1878   ierr = StashCreate_Private(matin->comm,1,1,&a->stash); CHKERRQ(ierr);
1879   if (oldmat->colmap) {
1880 #if defined (USE_CTABLE)
1881     ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
1882 #else
1883     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1884     PLogObjectMemory(mat,(a->N)*sizeof(int));
1885     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1886 #endif
1887   } else a->colmap = 0;
1888   if (oldmat->garray) {
1889     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1890     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1891     PLogObjectMemory(mat,len*sizeof(int));
1892     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1893   } else a->garray = 0;
1894 
1895   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1896   PLogObjectParent(mat,a->lvec);
1897   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1898   PLogObjectParent(mat,a->Mvctx);
1899   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
1900   PLogObjectParent(mat,a->A);
1901   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
1902   PLogObjectParent(mat,a->B);
1903   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1904   if (flg) {
1905     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1906   }
1907   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1908   *newmat = mat;
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #include "sys.h"
1913 
1914 #undef __FUNC__
1915 #define __FUNC__ "MatLoad_MPIAIJ"
1916 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1917 {
1918   Mat          A;
1919   Scalar       *vals,*svals;
1920   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1921   MPI_Status   status;
1922   int          i, nz, ierr, j,rstart, rend, fd;
1923   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1924   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1925   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1926 
1927   PetscFunctionBegin;
1928   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1929   if (!rank) {
1930     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1931     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1932     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1933     if (header[3] < 0) {
1934       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1935     }
1936   }
1937 
1938   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1939   M = header[1]; N = header[2];
1940   /* determine ownership of all rows */
1941   m = M/size + ((M % size) > rank);
1942   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1943   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1944   rowners[0] = 0;
1945   for ( i=2; i<=size; i++ ) {
1946     rowners[i] += rowners[i-1];
1947   }
1948   rstart = rowners[rank];
1949   rend   = rowners[rank+1];
1950 
1951   /* distribute row lengths to all processors */
1952   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1953   offlens = ourlens + (rend-rstart);
1954   if (!rank) {
1955     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1956     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1957     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1958     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1959     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1960     PetscFree(sndcounts);
1961   } else {
1962     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1963   }
1964 
1965   if (!rank) {
1966     /* calculate the number of nonzeros on each processor */
1967     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1968     PetscMemzero(procsnz,size*sizeof(int));
1969     for ( i=0; i<size; i++ ) {
1970       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1971         procsnz[i] += rowlengths[j];
1972       }
1973     }
1974     PetscFree(rowlengths);
1975 
1976     /* determine max buffer needed and allocate it */
1977     maxnz = 0;
1978     for ( i=0; i<size; i++ ) {
1979       maxnz = PetscMax(maxnz,procsnz[i]);
1980     }
1981     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1982 
1983     /* read in my part of the matrix column indices  */
1984     nz     = procsnz[0];
1985     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1986     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1987 
1988     /* read in every one elses and ship off */
1989     for ( i=1; i<size; i++ ) {
1990       nz   = procsnz[i];
1991       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1992       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1993     }
1994     PetscFree(cols);
1995   } else {
1996     /* determine buffer space needed for message */
1997     nz = 0;
1998     for ( i=0; i<m; i++ ) {
1999       nz += ourlens[i];
2000     }
2001     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
2002 
2003     /* receive message of column indices*/
2004     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2005     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2006     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2007   }
2008 
2009   /* determine column ownership if matrix is not square */
2010   if (N != M) {
2011     n      = N/size + ((N % size) > rank);
2012     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2013     cstart = cend - n;
2014   } else {
2015     cstart = rstart;
2016     cend   = rend;
2017     n      = cend - cstart;
2018   }
2019 
2020   /* loop over local rows, determining number of off diagonal entries */
2021   PetscMemzero(offlens,m*sizeof(int));
2022   jj = 0;
2023   for ( i=0; i<m; i++ ) {
2024     for ( j=0; j<ourlens[i]; j++ ) {
2025       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2026       jj++;
2027     }
2028   }
2029 
2030   /* create our matrix */
2031   for ( i=0; i<m; i++ ) {
2032     ourlens[i] -= offlens[i];
2033   }
2034   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2035   A = *newmat;
2036   ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr);
2037   for ( i=0; i<m; i++ ) {
2038     ourlens[i] += offlens[i];
2039   }
2040 
2041   if (!rank) {
2042     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
2043 
2044     /* read in my part of the matrix numerical values  */
2045     nz   = procsnz[0];
2046     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2047 
2048     /* insert into matrix */
2049     jj      = rstart;
2050     smycols = mycols;
2051     svals   = vals;
2052     for ( i=0; i<m; i++ ) {
2053       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2054       smycols += ourlens[i];
2055       svals   += ourlens[i];
2056       jj++;
2057     }
2058 
2059     /* read in other processors and ship out */
2060     for ( i=1; i<size; i++ ) {
2061       nz   = procsnz[i];
2062       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2063       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2064     }
2065     PetscFree(procsnz);
2066   } else {
2067     /* receive numeric values */
2068     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
2069 
2070     /* receive message of values*/
2071     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2072     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2073     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2074 
2075     /* insert into matrix */
2076     jj      = rstart;
2077     smycols = mycols;
2078     svals   = vals;
2079     for ( i=0; i<m; i++ ) {
2080       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2081       smycols += ourlens[i];
2082       svals   += ourlens[i];
2083       jj++;
2084     }
2085   }
2086   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2087 
2088   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2089   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #undef __FUNC__
2094 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2095 /*
2096     Not great since it makes two copies of the submatrix, first an SeqAIJ
2097   in local and then by concatenating the local matrices the end result.
2098   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2099 */
2100 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2101 {
2102   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2103   Mat        *local,M, Mreuse;
2104   Scalar     *vwork,*aa;
2105   MPI_Comm   comm = mat->comm;
2106   Mat_SeqAIJ *aij;
2107   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2108 
2109   PetscFunctionBegin;
2110   MPI_Comm_rank(comm,&rank);
2111   MPI_Comm_size(comm,&size);
2112 
2113   if (call ==  MAT_REUSE_MATRIX) {
2114     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2115     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2116     local = &Mreuse;
2117     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2118   } else {
2119     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2120     Mreuse = *local;
2121     PetscFree(local);
2122   }
2123 
2124   /*
2125       m - number of local rows
2126       n - number of columns (same on all processors)
2127       rstart - first row in new global matrix generated
2128   */
2129   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2130   if (call == MAT_INITIAL_MATRIX) {
2131     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2132     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2133     ii  = aij->i;
2134     jj  = aij->j;
2135 
2136     /*
2137         Determine the number of non-zeros in the diagonal and off-diagonal
2138         portions of the matrix in order to do correct preallocation
2139     */
2140 
2141     /* first get start and end of "diagonal" columns */
2142     if (csize == PETSC_DECIDE) {
2143       nlocal = n/size + ((n % size) > rank);
2144     } else {
2145       nlocal = csize;
2146     }
2147     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2148     rstart = rend - nlocal;
2149     if (rank == size - 1 && rend != n) {
2150       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2151     }
2152 
2153     /* next, compute all the lengths */
2154     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2155     olens = dlens + m;
2156     for ( i=0; i<m; i++ ) {
2157       jend = ii[i+1] - ii[i];
2158       olen = 0;
2159       dlen = 0;
2160       for ( j=0; j<jend; j++ ) {
2161         if ( *jj < rstart || *jj >= rend) olen++;
2162         else dlen++;
2163         jj++;
2164       }
2165       olens[i] = olen;
2166       dlens[i] = dlen;
2167     }
2168     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2169     PetscFree(dlens);
2170   } else {
2171     int ml,nl;
2172 
2173     M = *newmat;
2174     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2175     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2176     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2177     /*
2178          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2179        rather than the slower MatSetValues().
2180     */
2181     M->was_assembled = PETSC_TRUE;
2182     M->assembled     = PETSC_FALSE;
2183   }
2184   ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr);
2185   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2186   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2187   ii  = aij->i;
2188   jj  = aij->j;
2189   aa  = aij->a;
2190   for (i=0; i<m; i++) {
2191     row   = rstart + i;
2192     nz    = ii[i+1] - ii[i];
2193     cwork = jj;     jj += nz;
2194     vwork = aa;     aa += nz;
2195     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr);
2196   }
2197 
2198   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2199   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2200   *newmat = M;
2201 
2202   /* save submatrix used in processor for next request */
2203   if (call ==  MAT_INITIAL_MATRIX) {
2204     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2205     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2206   }
2207 
2208   PetscFunctionReturn(0);
2209 }
2210