xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 70e7bcd7a2cbf38404db10649f00b67a1197865e)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.299 1999/09/14 17:38:39 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
455   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
456   nrecvs = work[rank];
457   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
458   nmax = work[rank];
459   ierr = PetscFree(work);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
463   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
464   for ( i=0; i<nrecvs; i++ ) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
473   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
474   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
475   starts[0] = 0;
476   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   for ( i=0; i<N; i++ ) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ISRestoreIndices(is,&rows);
481 
482   starts[0] = 0;
483   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
484   count = 0;
485   for ( i=0; i<size; i++ ) {
486     if (procs[i]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]  = n;
504     slen += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
511   count = 0;
512   for ( i=0; i<nrecvs; i++ ) {
513     values = rvalues + i*nmax;
514     for ( j=0; j<lens[i]; j++ ) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for ( i = 0; i < slen; i++ ) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
560     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatMult_MPIAIJ"
571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr,nt;
575 
576   PetscFunctionBegin;
577   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
578   if (nt != a->n) {
579     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
580   }
581   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
582   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
583   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatMultAdd_MPIAIJ"
590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
591 {
592   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
593   int        ierr;
594 
595   PetscFunctionBegin;
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatMultTrans_MPIAIJ"
605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   /* do nondiagonal part */
612   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
613   /* send it on its way */
614   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   /* do local part */
616   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
617   /* receive remote parts: note this assumes the values are not actually */
618   /* inserted in yy until the next line, which is true for my implementation*/
619   /* but is not perhaps always true. */
620   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
629   int        ierr;
630 
631   PetscFunctionBegin;
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   /* send it on its way */
635   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   /* do local part */
637   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
638   /* receive remote parts: note this assumes the values are not actually */
639   /* inserted in yy until the next line, which is true for my implementation*/
640   /* but is not perhaps always true. */
641   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646   This only works correctly for square matrices where the subblock A->A is the
647    diagonal block
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
652 {
653   int        ierr;
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655 
656   PetscFunctionBegin;
657   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
658   if (a->rstart != a->cstart || a->rend != a->cend) {
659     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
660   }
661   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatScale_MPIAIJ"
667 int MatScale_MPIAIJ(Scalar *aa,Mat A)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   PetscFunctionBegin;
673   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
674   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNC__
679 #define __FUNC__ "MatDestroy_MPIAIJ"
680 int MatDestroy_MPIAIJ(Mat mat)
681 {
682   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686 
687   if (mat->mapping) {
688     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
689   }
690   if (mat->bmapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
692   }
693   if (mat->rmap) {
694     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
695   }
696   if (mat->cmap) {
697     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_USE_LOG)
700   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
701 #endif
702   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
703   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
706 #if defined (PETSC_USE_CTABLE)
707   if (aij->colmap) TableDelete(aij->colmap);
708 #else
709   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
710 #endif
711   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
712   if (aij->lvec)   VecDestroy(aij->lvec);
713   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
714   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
715   ierr = PetscFree(aij);CHKERRQ(ierr);
716   PLogObjectDestroy(mat);
717   PetscHeaderDestroy(mat);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatView_MPIAIJ_Binary"
723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
724 {
725   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
726   int         ierr;
727 
728   PetscFunctionBegin;
729   if (aij->size == 1) {
730     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
731   }
732   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
739 {
740   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
741   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
742   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
743   int         size = aij->size;
744   FILE        *fd;
745   ViewerType  vtype;
746 
747   PetscFunctionBegin;
748   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
749   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
750     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
751     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
752       MatInfo info;
753       int     flg;
754       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
758       PetscSequentialPhaseBegin(mat->comm,1);
759       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
760          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
761       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
762          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
763       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
764       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
765       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
766       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
767       fflush(fd);
768       PetscSequentialPhaseEnd(mat->comm,1);
769       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
770       PetscFunctionReturn(0);
771     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
772       PetscFunctionReturn(0);
773     }
774   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
775     Draw       draw;
776     PetscTruth isnull;
777     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
778     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
779   }
780 
781   if (size == 1) {
782     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
783   } else {
784     /* assemble the entire matrix onto first processor. */
785     Mat         A;
786     Mat_SeqAIJ *Aloc;
787     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
788     Scalar      *a;
789 
790     if (!rank) {
791       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     } else {
793       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
794     }
795     PLogObjectParent(mat,A);
796 
797     /* copy over the A part */
798     Aloc = (Mat_SeqAIJ*) aij->A->data;
799     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     row = aij->rstart;
801     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
802     for ( i=0; i<m; i++ ) {
803       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
804       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
805     }
806     aj = Aloc->j;
807     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
808 
809     /* copy over the B part */
810     Aloc = (Mat_SeqAIJ*) aij->B->data;
811     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
812     row = aij->rstart;
813     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
814     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
815     for ( i=0; i<m; i++ ) {
816       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
817       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
818     }
819     ierr = PetscFree(ct);CHKERRQ(ierr);
820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     /*
823        Everyone has to call to draw the matrix since the graphics waits are
824        synchronized across all processors that share the Draw object
825     */
826     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
828     }
829     ierr = MatDestroy(A);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNC__
835 #define __FUNC__ "MatView_MPIAIJ"
836 int MatView_MPIAIJ(Mat mat,Viewer viewer)
837 {
838   int         ierr;
839   ViewerType  vtype;
840 
841   PetscFunctionBegin;
842   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
843   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
844       PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) {
845     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
846     /*
847   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
848     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
849     */
850   } else {
851     SETERRQ(1,1,"Viewer type not supported by PETSc object");
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 /*
857     This has to provide several versions.
858 
859      2) a) use only local smoothing updating outer values only once.
860         b) local smoothing updating outer values each inner iteration
861      3) color updating out values betwen colors.
862 */
863 #undef __FUNC__
864 #define __FUNC__ "MatRelax_MPIAIJ"
865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
866                            double fshift,int its,Vec xx)
867 {
868   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
869   Mat        AA = mat->A, BB = mat->B;
870   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
871   Scalar     *b,*x,*xs,*ls,d,*v,sum;
872   int        ierr,*idx, *diag;
873   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
874 
875   PetscFunctionBegin;
876   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
877   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
878   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
879   xs = x + shift; /* shift by one for index start of 1 */
880   ls = ls + shift;
881   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
882   diag = A->diag;
883   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
884     if (flag & SOR_ZERO_INITIAL_GUESS) {
885       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
886       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
888       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
889       PetscFunctionReturn(0);
890     }
891     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
892     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
893     while (its--) {
894       /* go down through the rows */
895       for ( i=0; i<m; i++ ) {
896         n    = A->i[i+1] - A->i[i];
897 	PLogFlops(4*n+3);
898         idx  = A->j + A->i[i] + shift;
899         v    = A->a + A->i[i] + shift;
900         sum  = b[i];
901         SPARSEDENSEMDOT(sum,xs,v,idx,n);
902         d    = fshift + A->a[diag[i]+shift];
903         n    = B->i[i+1] - B->i[i];
904         idx  = B->j + B->i[i] + shift;
905         v    = B->a + B->i[i] + shift;
906         SPARSEDENSEMDOT(sum,ls,v,idx,n);
907         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
908       }
909       /* come up through the rows */
910       for ( i=m-1; i>-1; i-- ) {
911         n    = A->i[i+1] - A->i[i];
912 	PLogFlops(4*n+3)
913         idx  = A->j + A->i[i] + shift;
914         v    = A->a + A->i[i] + shift;
915         sum  = b[i];
916         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917         d    = fshift + A->a[diag[i]+shift];
918         n    = B->i[i+1] - B->i[i];
919         idx  = B->j + B->i[i] + shift;
920         v    = B->a + B->i[i] + shift;
921         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
923       }
924     }
925   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
926     if (flag & SOR_ZERO_INITIAL_GUESS) {
927       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
928       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
930       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
931       PetscFunctionReturn(0);
932     }
933     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
934     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
935     while (its--) {
936       for ( i=0; i<m; i++ ) {
937         n    = A->i[i+1] - A->i[i];
938 	PLogFlops(4*n+3);
939         idx  = A->j + A->i[i] + shift;
940         v    = A->a + A->i[i] + shift;
941         sum  = b[i];
942         SPARSEDENSEMDOT(sum,xs,v,idx,n);
943         d    = fshift + A->a[diag[i]+shift];
944         n    = B->i[i+1] - B->i[i];
945         idx  = B->j + B->i[i] + shift;
946         v    = B->a + B->i[i] + shift;
947         SPARSEDENSEMDOT(sum,ls,v,idx,n);
948         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
949       }
950     }
951   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
952     if (flag & SOR_ZERO_INITIAL_GUESS) {
953       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
954       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
955       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
956       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
957       PetscFunctionReturn(0);
958     }
959     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
960     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
961     while (its--) {
962       for ( i=m-1; i>-1; i-- ) {
963         n    = A->i[i+1] - A->i[i];
964 	PLogFlops(4*n+3);
965         idx  = A->j + A->i[i] + shift;
966         v    = A->a + A->i[i] + shift;
967         sum  = b[i];
968         SPARSEDENSEMDOT(sum,xs,v,idx,n);
969         d    = fshift + A->a[diag[i]+shift];
970         n    = B->i[i+1] - B->i[i];
971         idx  = B->j + B->i[i] + shift;
972         v    = B->a + B->i[i] + shift;
973         SPARSEDENSEMDOT(sum,ls,v,idx,n);
974         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
975       }
976     }
977   } else {
978     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
979   }
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
982   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNC__
987 #define __FUNC__ "MatGetInfo_MPIAIJ"
988 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
989 {
990   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
991   Mat        A = mat->A, B = mat->B;
992   int        ierr;
993   double     isend[5], irecv[5];
994 
995   PetscFunctionBegin;
996   info->block_size     = 1.0;
997   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
998   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
999   isend[3] = info->memory;  isend[4] = info->mallocs;
1000   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1001   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1002   isend[3] += info->memory;  isend[4] += info->mallocs;
1003   if (flag == MAT_LOCAL) {
1004     info->nz_used      = isend[0];
1005     info->nz_allocated = isend[1];
1006     info->nz_unneeded  = isend[2];
1007     info->memory       = isend[3];
1008     info->mallocs      = isend[4];
1009   } else if (flag == MAT_GLOBAL_MAX) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   } else if (flag == MAT_GLOBAL_SUM) {
1017     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1018     info->nz_used      = irecv[0];
1019     info->nz_allocated = irecv[1];
1020     info->nz_unneeded  = irecv[2];
1021     info->memory       = irecv[3];
1022     info->mallocs      = irecv[4];
1023   }
1024   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1025   info->fill_ratio_needed = 0;
1026   info->factor_mallocs    = 0;
1027   info->rows_global       = (double)mat->M;
1028   info->columns_global    = (double)mat->N;
1029   info->rows_local        = (double)mat->m;
1030   info->columns_local     = (double)mat->N;
1031 
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ "MatSetOption_MPIAIJ"
1037 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1040   int        ierr;
1041 
1042   PetscFunctionBegin;
1043   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1044       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1045       op == MAT_COLUMNS_UNSORTED ||
1046       op == MAT_COLUMNS_SORTED ||
1047       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1048       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1049         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1050         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1051   } else if (op == MAT_ROW_ORIENTED) {
1052     a->roworiented = 1;
1053     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1054     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1055   } else if (op == MAT_ROWS_SORTED ||
1056              op == MAT_ROWS_UNSORTED ||
1057              op == MAT_SYMMETRIC ||
1058              op == MAT_STRUCTURALLY_SYMMETRIC ||
1059              op == MAT_YES_NEW_DIAGONALS)
1060     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1061   else if (op == MAT_COLUMN_ORIENTED) {
1062     a->roworiented = 0;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1066     a->donotstash = 1;
1067   } else if (op == MAT_NO_NEW_DIAGONALS){
1068     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1069   } else {
1070     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1071   }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNC__
1076 #define __FUNC__ "MatGetSize_MPIAIJ"
1077 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1078 {
1079   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1080 
1081   PetscFunctionBegin;
1082   if (m) *m = mat->M;
1083   if (n) *n = mat->N;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNC__
1088 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1089 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1090 {
1091   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1092 
1093   PetscFunctionBegin;
1094   if (m) *m = mat->m;
1095   if (n) *n = mat->n;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1101 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104 
1105   PetscFunctionBegin;
1106   *m = mat->rstart; *n = mat->rend;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNC__
1111 #define __FUNC__ "MatGetRow_MPIAIJ"
1112 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1113 {
1114   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1115   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1116   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1117   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1118   int        *cmap, *idx_p;
1119 
1120   PetscFunctionBegin;
1121   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1122   mat->getrowactive = PETSC_TRUE;
1123 
1124   if (!mat->rowvalues && (idx || v)) {
1125     /*
1126         allocate enough space to hold information from the longest row.
1127     */
1128     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1129     int     max = 1,m = mat->m,tmp;
1130     for ( i=0; i<m; i++ ) {
1131       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1132       if (max < tmp) { max = tmp; }
1133     }
1134     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1135     mat->rowindices = (int *) (mat->rowvalues + max);
1136   }
1137 
1138   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1139   lrow = row - rstart;
1140 
1141   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1142   if (!v)   {pvA = 0; pvB = 0;}
1143   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1144   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1145   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1146   nztot = nzA + nzB;
1147 
1148   cmap  = mat->garray;
1149   if (v  || idx) {
1150     if (nztot) {
1151       /* Sort by increasing column numbers, assuming A and B already sorted */
1152       int imark = -1;
1153       if (v) {
1154         *v = v_p = mat->rowvalues;
1155         for ( i=0; i<nzB; i++ ) {
1156           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1157           else break;
1158         }
1159         imark = i;
1160         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1161         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1162       }
1163       if (idx) {
1164         *idx = idx_p = mat->rowindices;
1165         if (imark > -1) {
1166           for ( i=0; i<imark; i++ ) {
1167             idx_p[i] = cmap[cworkB[i]];
1168           }
1169         } else {
1170           for ( i=0; i<nzB; i++ ) {
1171             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1172             else break;
1173           }
1174           imark = i;
1175         }
1176         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1177         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1178       }
1179     } else {
1180       if (idx) *idx = 0;
1181       if (v)   *v   = 0;
1182     }
1183   }
1184   *nz = nztot;
1185   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1186   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNC__
1191 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1192 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1193 {
1194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1195 
1196   PetscFunctionBegin;
1197   if (aij->getrowactive == PETSC_FALSE) {
1198     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1199   }
1200   aij->getrowactive = PETSC_FALSE;
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNC__
1205 #define __FUNC__ "MatNorm_MPIAIJ"
1206 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1207 {
1208   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1209   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1210   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1211   double     sum = 0.0;
1212   Scalar     *v;
1213 
1214   PetscFunctionBegin;
1215   if (aij->size == 1) {
1216     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1217   } else {
1218     if (type == NORM_FROBENIUS) {
1219       v = amat->a;
1220       for (i=0; i<amat->nz; i++ ) {
1221 #if defined(PETSC_USE_COMPLEX)
1222         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1223 #else
1224         sum += (*v)*(*v); v++;
1225 #endif
1226       }
1227       v = bmat->a;
1228       for (i=0; i<bmat->nz; i++ ) {
1229 #if defined(PETSC_USE_COMPLEX)
1230         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1231 #else
1232         sum += (*v)*(*v); v++;
1233 #endif
1234       }
1235       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1236       *norm = sqrt(*norm);
1237     } else if (type == NORM_1) { /* max column norm */
1238       double *tmp, *tmp2;
1239       int    *jj, *garray = aij->garray;
1240       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1241       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1242       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1243       *norm = 0.0;
1244       v = amat->a; jj = amat->j;
1245       for ( j=0; j<amat->nz; j++ ) {
1246         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1247       }
1248       v = bmat->a; jj = bmat->j;
1249       for ( j=0; j<bmat->nz; j++ ) {
1250         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1251       }
1252       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1253       for ( j=0; j<aij->N; j++ ) {
1254         if (tmp2[j] > *norm) *norm = tmp2[j];
1255       }
1256       ierr = PetscFree(tmp);CHKERRQ(ierr);
1257       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1258     } else if (type == NORM_INFINITY) { /* max row norm */
1259       double ntemp = 0.0;
1260       for ( j=0; j<amat->m; j++ ) {
1261         v = amat->a + amat->i[j] + shift;
1262         sum = 0.0;
1263         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1264           sum += PetscAbsScalar(*v); v++;
1265         }
1266         v = bmat->a + bmat->i[j] + shift;
1267         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1268           sum += PetscAbsScalar(*v); v++;
1269         }
1270         if (sum > ntemp) ntemp = sum;
1271       }
1272       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1273     } else {
1274       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1275     }
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNC__
1281 #define __FUNC__ "MatTranspose_MPIAIJ"
1282 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1283 {
1284   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1285   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1286   int        ierr,shift = Aloc->indexshift;
1287   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1288   Mat        B;
1289   Scalar     *array;
1290 
1291   PetscFunctionBegin;
1292   if (matout == PETSC_NULL && M != N) {
1293     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1294   }
1295 
1296   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1297 
1298   /* copy over the A part */
1299   Aloc = (Mat_SeqAIJ*) a->A->data;
1300   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1301   row = a->rstart;
1302   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1303   for ( i=0; i<m; i++ ) {
1304     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1305     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1306   }
1307   aj = Aloc->j;
1308   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1309 
1310   /* copy over the B part */
1311   Aloc = (Mat_SeqAIJ*) a->B->data;
1312   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1313   row = a->rstart;
1314   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1315   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1316   for ( i=0; i<m; i++ ) {
1317     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1318     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1319   }
1320   ierr = PetscFree(ct);CHKERRQ(ierr);
1321   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323   if (matout != PETSC_NULL) {
1324     *matout = B;
1325   } else {
1326     PetscOps       *Abops;
1327     struct _MatOps *Aops;
1328 
1329     /* This isn't really an in-place transpose .... but free data structures from a */
1330     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1331     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1332     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1333 #if defined (PETSC_USE_CTABLE)
1334     if (a->colmap) TableDelete(a->colmap);
1335 #else
1336     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1337 #endif
1338     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1339     if (a->lvec) VecDestroy(a->lvec);
1340     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1341     ierr = PetscFree(a);CHKERRQ(ierr);
1342 
1343     /*
1344        This is horrible, horrible code. We need to keep the
1345       A pointers for the bops and ops but copy everything
1346       else from C.
1347     */
1348     Abops   = A->bops;
1349     Aops    = A->ops;
1350     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1351     A->bops = Abops;
1352     A->ops  = Aops;
1353     PetscHeaderDestroy(B);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNC__
1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1361 {
1362   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1363   Mat        a = aij->A, b = aij->B;
1364   int        ierr,s1,s2,s3;
1365 
1366   PetscFunctionBegin;
1367   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1368   if (rr) {
1369     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1370     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1371     /* Overlap communication with computation. */
1372     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1373   }
1374   if (ll) {
1375     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1376     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1377     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1378   }
1379   /* scale  the diagonal block */
1380   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1381 
1382   if (rr) {
1383     /* Do a scatter end and then right scale the off-diagonal block */
1384     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1385     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1386   }
1387 
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1394 int MatPrintHelp_MPIAIJ(Mat A)
1395 {
1396   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1397   int        ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!a->rank) {
1401     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNC__
1407 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1408 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1409 {
1410   PetscFunctionBegin;
1411   *bs = 1;
1412   PetscFunctionReturn(0);
1413 }
1414 #undef __FUNC__
1415 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1416 int MatSetUnfactored_MPIAIJ(Mat A)
1417 {
1418   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1419   int        ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNC__
1427 #define __FUNC__ "MatEqual_MPIAIJ"
1428 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1429 {
1430   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1431   Mat        a, b, c, d;
1432   PetscTruth flg;
1433   int        ierr;
1434 
1435   PetscFunctionBegin;
1436   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1437   a = matA->A; b = matA->B;
1438   c = matB->A; d = matB->B;
1439 
1440   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1441   if (flg == PETSC_TRUE) {
1442     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1443   }
1444   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNC__
1449 #define __FUNC__ "MatCopy_MPIAIJ"
1450 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1451 {
1452   int        ierr;
1453   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1454   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1455 
1456   PetscFunctionBegin;
1457   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1458     /* because of the column compression in the off-processor part of the matrix a->B,
1459        the number of columns in a->B and b->B may be different, hence we cannot call
1460        the MatCopy() directly on the two parts. If need be, we can provide a more
1461        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1462        then copying the submatrices */
1463     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1464   } else {
1465     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1466     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1472 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1473 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1474 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1475 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1476 
1477 /* -------------------------------------------------------------------*/
1478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1479        MatGetRow_MPIAIJ,
1480        MatRestoreRow_MPIAIJ,
1481        MatMult_MPIAIJ,
1482        MatMultAdd_MPIAIJ,
1483        MatMultTrans_MPIAIJ,
1484        MatMultTransAdd_MPIAIJ,
1485        0,
1486        0,
1487        0,
1488        0,
1489        0,
1490        0,
1491        MatRelax_MPIAIJ,
1492        MatTranspose_MPIAIJ,
1493        MatGetInfo_MPIAIJ,
1494        MatEqual_MPIAIJ,
1495        MatGetDiagonal_MPIAIJ,
1496        MatDiagonalScale_MPIAIJ,
1497        MatNorm_MPIAIJ,
1498        MatAssemblyBegin_MPIAIJ,
1499        MatAssemblyEnd_MPIAIJ,
1500        0,
1501        MatSetOption_MPIAIJ,
1502        MatZeroEntries_MPIAIJ,
1503        MatZeroRows_MPIAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508        MatGetSize_MPIAIJ,
1509        MatGetLocalSize_MPIAIJ,
1510        MatGetOwnershipRange_MPIAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515        MatDuplicate_MPIAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520        0,
1521        MatGetSubMatrices_MPIAIJ,
1522        MatIncreaseOverlap_MPIAIJ,
1523        MatGetValues_MPIAIJ,
1524        MatCopy_MPIAIJ,
1525        MatPrintHelp_MPIAIJ,
1526        MatScale_MPIAIJ,
1527        0,
1528        0,
1529        0,
1530        MatGetBlockSize_MPIAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535        MatFDColoringCreate_MPIAIJ,
1536        0,
1537        MatSetUnfactored_MPIAIJ,
1538        0,
1539        0,
1540        MatGetSubMatrix_MPIAIJ,
1541        0,
1542        0,
1543        MatGetMaps_Petsc};
1544 
1545 /* ----------------------------------------------------------------------------------------*/
1546 
1547 EXTERN_C_BEGIN
1548 #undef __FUNC__
1549 #define __FUNC__ "MatStoreValues_MPIAIJ"
1550 int MatStoreValues_MPIAIJ(Mat mat)
1551 {
1552   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1553   int        ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1557   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNC__
1564 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1565 int MatRetrieveValues_MPIAIJ(Mat mat)
1566 {
1567   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1568   int        ierr;
1569 
1570   PetscFunctionBegin;
1571   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1572   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 #include "pc.h"
1578 EXTERN_C_BEGIN
1579 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1580 EXTERN_C_END
1581 
1582 #undef __FUNC__
1583 #define __FUNC__ "MatCreateMPIAIJ"
1584 /*@C
1585    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1586    (the default parallel PETSc format).  For good matrix assembly performance
1587    the user should preallocate the matrix storage by setting the parameters
1588    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1589    performance can be increased by more than a factor of 50.
1590 
1591    Collective on MPI_Comm
1592 
1593    Input Parameters:
1594 +  comm - MPI communicator
1595 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1596            This value should be the same as the local size used in creating the
1597            y vector for the matrix-vector product y = Ax.
1598 .  n - This value should be the same as the local size used in creating the
1599        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1600        calculated if N is given) For square matrices n is almost always m.
1601 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1602 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1603 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1604            (same value is used for all local rows)
1605 .  d_nnz - array containing the number of nonzeros in the various rows of the
1606            DIAGONAL portion of the local submatrix (possibly different for each row)
1607            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1608            The size of this array is equal to the number of local rows, i.e 'm'.
1609            You must leave room for the diagonal entry even if it is zero.
1610 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1611            submatrix (same value is used for all local rows).
1612 -  o_nnz - array containing the number of nonzeros in the various rows of the
1613            OFF-DIAGONAL portion of the local submatrix (possibly different for
1614            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1615            structure. The size of this array is equal to the number
1616            of local rows, i.e 'm'.
1617 
1618    Output Parameter:
1619 .  A - the matrix
1620 
1621    Notes:
1622    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1623    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1624    storage requirements for this matrix.
1625 
1626    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1627    processor than it must be used on all processors that share the object for
1628    that argument.
1629 
1630    The AIJ format (also called the Yale sparse matrix format or
1631    compressed row storage), is fully compatible with standard Fortran 77
1632    storage.  That is, the stored row and column indices can begin at
1633    either one (as in Fortran) or zero.  See the users manual for details.
1634 
1635    The user MUST specify either the local or global matrix dimensions
1636    (possibly both).
1637 
1638    The parallel matrix is partitioned such that the first m0 rows belong to
1639    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1640    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1641 
1642    The DIAGONAL portion of the local submatrix of a processor can be defined
1643    as the submatrix which is obtained by extraction the part corresponding
1644    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1645    first row that belongs to the processor, and r2 is the last row belonging
1646    to the this processor. This is a square mxm matrix. The remaining portion
1647    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1648 
1649    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1650 
1651    By default, this format uses inodes (identical nodes) when possible.
1652    We search for consecutive rows with the same nonzero structure, thereby
1653    reusing matrix information to achieve increased efficiency.
1654 
1655    Options Database Keys:
1656 +  -mat_aij_no_inode  - Do not use inodes
1657 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1658 -  -mat_aij_oneindex - Internally use indexing starting at 1
1659         rather than 0.  Note that when calling MatSetValues(),
1660         the user still MUST index entries starting at 0!
1661 .   -mat_mpi - use the parallel matrix data structures even on one processor
1662                (defaults to using SeqBAIJ format on one processor)
1663 .   -mat_mpi - use the parallel matrix data structures even on one processor
1664                (defaults to using SeqAIJ format on one processor)
1665 
1666 
1667    Example usage:
1668 
1669    Consider the following 8x8 matrix with 34 non-zero values, that is
1670    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1671    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1672    as follows:
1673 
1674 .vb
1675             1  2  0  |  0  3  0  |  0  4
1676     Proc0   0  5  6  |  7  0  0  |  8  0
1677             9  0 10  | 11  0  0  | 12  0
1678     -------------------------------------
1679            13  0 14  | 15 16 17  |  0  0
1680     Proc1   0 18  0  | 19 20 21  |  0  0
1681             0  0  0  | 22 23  0  | 24  0
1682     -------------------------------------
1683     Proc2  25 26 27  |  0  0 28  | 29  0
1684            30  0  0  | 31 32 33  |  0 34
1685 .ve
1686 
1687    This can be represented as a collection of submatrices as:
1688 
1689 .vb
1690       A B C
1691       D E F
1692       G H I
1693 .ve
1694 
1695    Where the submatrices A,B,C are owned by proc0, D,E,F are
1696    owned by proc1, G,H,I are owned by proc2.
1697 
1698    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1699    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1700    The 'M','N' parameters are 8,8, and have the same values on all procs.
1701 
1702    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1703    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1704    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1705    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1706    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1707    matrix, ans [DF] as another SeqAIJ matrix.
1708 
1709    When d_nz, o_nz parameters are specified, d_nz storage elements are
1710    allocated for every row of the local diagonal submatrix, and o_nz
1711    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1712    One way to choose d_nz and o_nz is to use the max nonzerors per local
1713    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1714    In this case, the values of d_nz,o_nz are:
1715 .vb
1716      proc0 : dnz = 2, o_nz = 2
1717      proc1 : dnz = 3, o_nz = 2
1718      proc2 : dnz = 1, o_nz = 4
1719 .ve
1720    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1721    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1722    for proc3. i.e we are using 12+15+10=37 storage locations to store
1723    34 values.
1724 
1725    When d_nnz, o_nnz parameters are specified, the storage is specified
1726    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1727    In the above case the values for d_nnz,o_nnz are:
1728 .vb
1729      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1730      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1731      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1732 .ve
1733    Here the space allocated is sum of all the above values i.e 34, and
1734    hence pre-allocation is perfect.
1735 
1736    Level: intermediate
1737 
1738 .keywords: matrix, aij, compressed row, sparse, parallel
1739 
1740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1741 @*/
1742 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1743 {
1744   Mat          B;
1745   Mat_MPIAIJ   *b;
1746   int          ierr, i,size,flag1 = 0, flag2 = 0;
1747 
1748   PetscFunctionBegin;
1749 
1750   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1751   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1752   if (d_nnz) {
1753     for (i=0; i<m; i++) {
1754       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1755     }
1756   }
1757   if (o_nnz) {
1758     for (i=0; i<m; i++) {
1759       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1760     }
1761   }
1762 
1763   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1764   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1765   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1766   if (!flag1 && !flag2 && size == 1) {
1767     if (M == PETSC_DECIDE) M = m;
1768     if (N == PETSC_DECIDE) N = n;
1769     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1770     PetscFunctionReturn(0);
1771   }
1772 
1773   *A = 0;
1774   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1775   PLogObjectCreate(B);
1776   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1777   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1778   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1779   B->ops->destroy = MatDestroy_MPIAIJ;
1780   B->ops->view    = MatView_MPIAIJ;
1781   B->factor       = 0;
1782   B->assembled    = PETSC_FALSE;
1783   B->mapping      = 0;
1784 
1785   B->insertmode      = NOT_SET_VALUES;
1786   b->size            = size;
1787   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1788 
1789   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1790     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1791   }
1792 
1793   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1794   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1795   b->m = m; B->m = m;
1796   b->n = n; B->n = n;
1797   b->N = N; B->N = N;
1798   b->M = M; B->M = M;
1799 
1800   /* the information in the maps duplicates the information computed below, eventually
1801      we should remove the duplicate information that is not contained in the maps */
1802   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1803   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1804 
1805   /* build local table of row and column ownerships */
1806   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1807   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1808   b->cowners = b->rowners + b->size + 2;
1809   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1810   b->rowners[0] = 0;
1811   for ( i=2; i<=b->size; i++ ) {
1812     b->rowners[i] += b->rowners[i-1];
1813   }
1814   b->rstart = b->rowners[b->rank];
1815   b->rend   = b->rowners[b->rank+1];
1816   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1817   b->cowners[0] = 0;
1818   for ( i=2; i<=b->size; i++ ) {
1819     b->cowners[i] += b->cowners[i-1];
1820   }
1821   b->cstart = b->cowners[b->rank];
1822   b->cend   = b->cowners[b->rank+1];
1823 
1824   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1825   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1826   PLogObjectParent(B,b->A);
1827   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1828   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1829   PLogObjectParent(B,b->B);
1830 
1831   /* build cache for off array entries formed */
1832   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1833   b->donotstash  = 0;
1834   b->colmap      = 0;
1835   b->garray      = 0;
1836   b->roworiented = 1;
1837 
1838   /* stuff used for matrix vector multiply */
1839   b->lvec      = 0;
1840   b->Mvctx     = 0;
1841 
1842   /* stuff for MatGetRow() */
1843   b->rowindices   = 0;
1844   b->rowvalues    = 0;
1845   b->getrowactive = PETSC_FALSE;
1846 
1847   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1848                                      "MatStoreValues_MPIAIJ",
1849                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1850   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1851                                      "MatRetrieveValues_MPIAIJ",
1852                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1854                                      "MatGetDiagonalBlock_MPIAIJ",
1855                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1856   *A = B;
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNC__
1861 #define __FUNC__ "MatDuplicate_MPIAIJ"
1862 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1863 {
1864   Mat        mat;
1865   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1866   int        ierr, len=0, flg;
1867 
1868   PetscFunctionBegin;
1869   *newmat       = 0;
1870   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1871   PLogObjectCreate(mat);
1872   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1873   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1874   mat->ops->destroy = MatDestroy_MPIAIJ;
1875   mat->ops->view    = MatView_MPIAIJ;
1876   mat->factor       = matin->factor;
1877   mat->assembled    = PETSC_TRUE;
1878   mat->insertmode   = NOT_SET_VALUES;
1879 
1880   a->m = mat->m   = oldmat->m;
1881   a->n = mat->n   = oldmat->n;
1882   a->M = mat->M   = oldmat->M;
1883   a->N = mat->N   = oldmat->N;
1884 
1885   a->rstart       = oldmat->rstart;
1886   a->rend         = oldmat->rend;
1887   a->cstart       = oldmat->cstart;
1888   a->cend         = oldmat->cend;
1889   a->size         = oldmat->size;
1890   a->rank         = oldmat->rank;
1891   a->donotstash   = oldmat->donotstash;
1892   a->roworiented  = oldmat->roworiented;
1893   a->rowindices   = 0;
1894   a->rowvalues    = 0;
1895   a->getrowactive = PETSC_FALSE;
1896 
1897   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1898   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1899   a->cowners = a->rowners + a->size + 2;
1900   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1901   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1902   if (oldmat->colmap) {
1903 #if defined (PETSC_USE_CTABLE)
1904     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1905 #else
1906     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1907     PLogObjectMemory(mat,(a->N)*sizeof(int));
1908     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1909 #endif
1910   } else a->colmap = 0;
1911   if (oldmat->garray) {
1912     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1913     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1914     PLogObjectMemory(mat,len*sizeof(int));
1915     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1916   } else a->garray = 0;
1917 
1918   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1919   PLogObjectParent(mat,a->lvec);
1920   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1921   PLogObjectParent(mat,a->Mvctx);
1922   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1923   PLogObjectParent(mat,a->A);
1924   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1925   PLogObjectParent(mat,a->B);
1926   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1927   if (flg) {
1928     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1929   }
1930   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1931   *newmat = mat;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #include "sys.h"
1936 
1937 #undef __FUNC__
1938 #define __FUNC__ "MatLoad_MPIAIJ"
1939 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1940 {
1941   Mat          A;
1942   Scalar       *vals,*svals;
1943   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1944   MPI_Status   status;
1945   int          i, nz, ierr, j,rstart, rend, fd;
1946   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1947   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1948   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1949 
1950   PetscFunctionBegin;
1951   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1952   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1953   if (!rank) {
1954     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1955     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1956     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1957     if (header[3] < 0) {
1958       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1959     }
1960   }
1961 
1962   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1963   M = header[1]; N = header[2];
1964   /* determine ownership of all rows */
1965   m = M/size + ((M % size) > rank);
1966   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1967   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1968   rowners[0] = 0;
1969   for ( i=2; i<=size; i++ ) {
1970     rowners[i] += rowners[i-1];
1971   }
1972   rstart = rowners[rank];
1973   rend   = rowners[rank+1];
1974 
1975   /* distribute row lengths to all processors */
1976   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1977   offlens = ourlens + (rend-rstart);
1978   if (!rank) {
1979     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1980     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1981     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1982     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1983     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1984     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1985   } else {
1986     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1987   }
1988 
1989   if (!rank) {
1990     /* calculate the number of nonzeros on each processor */
1991     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1992     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1993     for ( i=0; i<size; i++ ) {
1994       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1995         procsnz[i] += rowlengths[j];
1996       }
1997     }
1998     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1999 
2000     /* determine max buffer needed and allocate it */
2001     maxnz = 0;
2002     for ( i=0; i<size; i++ ) {
2003       maxnz = PetscMax(maxnz,procsnz[i]);
2004     }
2005     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2006 
2007     /* read in my part of the matrix column indices  */
2008     nz     = procsnz[0];
2009     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2010     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2011 
2012     /* read in every one elses and ship off */
2013     for ( i=1; i<size; i++ ) {
2014       nz   = procsnz[i];
2015       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2016       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2017     }
2018     ierr = PetscFree(cols);CHKERRQ(ierr);
2019   } else {
2020     /* determine buffer space needed for message */
2021     nz = 0;
2022     for ( i=0; i<m; i++ ) {
2023       nz += ourlens[i];
2024     }
2025     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2026 
2027     /* receive message of column indices*/
2028     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2029     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2030     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2031   }
2032 
2033   /* determine column ownership if matrix is not square */
2034   if (N != M) {
2035     n      = N/size + ((N % size) > rank);
2036     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2037     cstart = cend - n;
2038   } else {
2039     cstart = rstart;
2040     cend   = rend;
2041     n      = cend - cstart;
2042   }
2043 
2044   /* loop over local rows, determining number of off diagonal entries */
2045   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2046   jj = 0;
2047   for ( i=0; i<m; i++ ) {
2048     for ( j=0; j<ourlens[i]; j++ ) {
2049       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2050       jj++;
2051     }
2052   }
2053 
2054   /* create our matrix */
2055   for ( i=0; i<m; i++ ) {
2056     ourlens[i] -= offlens[i];
2057   }
2058   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2059   A = *newmat;
2060   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2061   for ( i=0; i<m; i++ ) {
2062     ourlens[i] += offlens[i];
2063   }
2064 
2065   if (!rank) {
2066     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2067 
2068     /* read in my part of the matrix numerical values  */
2069     nz   = procsnz[0];
2070     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2071 
2072     /* insert into matrix */
2073     jj      = rstart;
2074     smycols = mycols;
2075     svals   = vals;
2076     for ( i=0; i<m; i++ ) {
2077       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2078       smycols += ourlens[i];
2079       svals   += ourlens[i];
2080       jj++;
2081     }
2082 
2083     /* read in other processors and ship out */
2084     for ( i=1; i<size; i++ ) {
2085       nz   = procsnz[i];
2086       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2087       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2088     }
2089     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2090   } else {
2091     /* receive numeric values */
2092     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2093 
2094     /* receive message of values*/
2095     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2096     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2097     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2098 
2099     /* insert into matrix */
2100     jj      = rstart;
2101     smycols = mycols;
2102     svals   = vals;
2103     for ( i=0; i<m; i++ ) {
2104       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2105       smycols += ourlens[i];
2106       svals   += ourlens[i];
2107       jj++;
2108     }
2109   }
2110   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2111   ierr = PetscFree(vals);CHKERRQ(ierr);
2112   ierr = PetscFree(mycols);CHKERRQ(ierr);
2113   ierr = PetscFree(rowners);CHKERRQ(ierr);
2114 
2115   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2116   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNC__
2121 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2122 /*
2123     Not great since it makes two copies of the submatrix, first an SeqAIJ
2124   in local and then by concatenating the local matrices the end result.
2125   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2126 */
2127 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2128 {
2129   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2130   Mat        *local,M, Mreuse;
2131   Scalar     *vwork,*aa;
2132   MPI_Comm   comm = mat->comm;
2133   Mat_SeqAIJ *aij;
2134   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2135 
2136   PetscFunctionBegin;
2137   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2139 
2140   if (call ==  MAT_REUSE_MATRIX) {
2141     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2142     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2143     local = &Mreuse;
2144     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2145   } else {
2146     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2147     Mreuse = *local;
2148     ierr = PetscFree(local);CHKERRQ(ierr);
2149   }
2150 
2151   /*
2152       m - number of local rows
2153       n - number of columns (same on all processors)
2154       rstart - first row in new global matrix generated
2155   */
2156   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2157   if (call == MAT_INITIAL_MATRIX) {
2158     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2159     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2160     ii  = aij->i;
2161     jj  = aij->j;
2162 
2163     /*
2164         Determine the number of non-zeros in the diagonal and off-diagonal
2165         portions of the matrix in order to do correct preallocation
2166     */
2167 
2168     /* first get start and end of "diagonal" columns */
2169     if (csize == PETSC_DECIDE) {
2170       nlocal = n/size + ((n % size) > rank);
2171     } else {
2172       nlocal = csize;
2173     }
2174     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2175     rstart = rend - nlocal;
2176     if (rank == size - 1 && rend != n) {
2177       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2178     }
2179 
2180     /* next, compute all the lengths */
2181     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2182     olens = dlens + m;
2183     for ( i=0; i<m; i++ ) {
2184       jend = ii[i+1] - ii[i];
2185       olen = 0;
2186       dlen = 0;
2187       for ( j=0; j<jend; j++ ) {
2188         if ( *jj < rstart || *jj >= rend) olen++;
2189         else dlen++;
2190         jj++;
2191       }
2192       olens[i] = olen;
2193       dlens[i] = dlen;
2194     }
2195     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2196     ierr = PetscFree(dlens);CHKERRQ(ierr);
2197   } else {
2198     int ml,nl;
2199 
2200     M = *newmat;
2201     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2202     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2203     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2204     /*
2205          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2206        rather than the slower MatSetValues().
2207     */
2208     M->was_assembled = PETSC_TRUE;
2209     M->assembled     = PETSC_FALSE;
2210   }
2211   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2212   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2213   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2214   ii  = aij->i;
2215   jj  = aij->j;
2216   aa  = aij->a;
2217   for (i=0; i<m; i++) {
2218     row   = rstart + i;
2219     nz    = ii[i+1] - ii[i];
2220     cwork = jj;     jj += nz;
2221     vwork = aa;     aa += nz;
2222     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2223   }
2224 
2225   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2226   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2227   *newmat = M;
2228 
2229   /* save submatrix used in processor for next request */
2230   if (call ==  MAT_INITIAL_MATRIX) {
2231     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2232     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2233   }
2234 
2235   PetscFunctionReturn(0);
2236 }
2237