xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 549d3d68a6ae470532d58d544870024f02ff2d7c)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.291 1999/04/19 20:30:26 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i;
28 #if defined (USE_CTABLE)
29   int        ierr;
30 #endif
31 
32   PetscFunctionBegin;
33 #if defined (USE_CTABLE)
34   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
35   for ( i=0; i<n; i++ ){
36     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
37   }
38 #else
39   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
40   PLogObjectMemory(mat,aij->N*sizeof(int));
41   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
42   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
43 #endif
44   PetscFunctionReturn(0);
45 }
46 
47 #define CHUNKSIZE   15
48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
49 { \
50  \
51     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
52     rmax = aimax[row]; nrow = ailen[row];  \
53     col1 = col - shift; \
54      \
55     low = 0; high = nrow; \
56     while (high-low > 5) { \
57       t = (low+high)/2; \
58       if (rp[t] > col) high = t; \
59       else             low  = t; \
60     } \
61       for ( _i=0; _i<nrow; _i++ ) { \
62         if (rp[_i] > col1) break; \
63         if (rp[_i] == col1) { \
64           if (addv == ADD_VALUES) ap[_i] += value;   \
65           else                  ap[_i] = value; \
66           goto a_noinsert; \
67         } \
68       }  \
69       if (nonew == 1) goto a_noinsert; \
70       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
71       if (nrow >= rmax) { \
72         /* there is no extra room in row, therefore enlarge */ \
73         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
74         Scalar *new_a; \
75  \
76         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
77  \
78         /* malloc new storage space */ \
79         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
80         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
81         new_j   = (int *) (new_a + new_nz); \
82         new_i   = new_j + new_nz; \
83  \
84         /* copy over old data into new slots */ \
85         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
86         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
87         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
88         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
89         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
90                                                            len*sizeof(int));CHKERRQ(ierr); \
91         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
92         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
93                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
94         /* free up old matrix storage */ \
95  \
96         PetscFree(a->a);  \
97         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         PetscFree(b->a);  \
168         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
169         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
170         b->singlemalloc = 1; \
171  \
172         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
173         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
174         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
175         b->maxnz += CHUNKSIZE; \
176         b->reallocs++; \
177       } \
178       N = nrow++ - 1; b->nz++; \
179       /* shift up all the later entries in this row */ \
180       for ( ii=N; ii>=_i; ii-- ) { \
181         rp[ii+1] = rp[ii]; \
182         ap[ii+1] = ap[ii]; \
183       } \
184       rp[_i] = col1;  \
185       ap[_i] = value;  \
186       b_noinsert: ; \
187       bilen[row] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIAIJ"
192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
195   Scalar     value;
196   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
197   int        cstart = aij->cstart, cend = aij->cend,row,col;
198   int        roworiented = aij->roworiented;
199 
200   /* Some Variables required in the macro */
201   Mat        A = aij->A;
202   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
203   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
204   Scalar     *aa = a->a;
205 
206   Mat        B = aij->B;
207   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
208   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
209   Scalar     *ba = b->a;
210 
211   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
212   int        nonew = a->nonew,shift = a->indexshift;
213   Scalar     *ap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217     if (im[i] < 0) continue;
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart && im[i] < rend) {
222       row = im[i] - rstart;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart && in[j] < cend){
225           col = in[j] - cstart;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230         else if (in[j] < 0) continue;
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (USE_CTABLE)
240             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ *) B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (!aij->donotstash) {
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatGetValues_MPIAIJ"
275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
276 {
277   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
278   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
279   int        cstart = aij->cstart, cend = aij->cend,row,col;
280 
281   PetscFunctionBegin;
282   for ( i=0; i<m; i++ ) {
283     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
284     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
285     if (idxm[i] >= rstart && idxm[i] < rend) {
286       row = idxm[i] - rstart;
287       for ( j=0; j<n; j++ ) {
288         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
289         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
290         if (idxn[j] >= cstart && idxn[j] < cend){
291           col = idxn[j] - cstart;
292           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
293         } else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297 #if defined (USE_CTABLE)
298           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
299           col --;
300 #else
301           col = aij->colmap[idxn[j]] - 1;
302 #endif
303           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304           else {
305             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
306           }
307         }
308       }
309     } else {
310       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNC__
317 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
318 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
319 {
320   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
321   int         ierr,nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNC__
344 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
345 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
348   int         i,j,rstart,ncols,n,ierr,flg;
349   int         *row,*col,other_disassembled;
350   Scalar      *val;
351   InsertMode  addv = mat->insertmode;
352 
353   PetscFunctionBegin;
354   if (!aij->donotstash) {
355     while (1) {
356       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
357       if (!flg) break;
358 
359       for ( i=0; i<n; ) {
360         /* Now identify the consecutive vals belonging to the same row */
361         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
362         if (j < n) ncols = j-i;
363         else       ncols = n-i;
364         /* Now assemble all these values with a single function call */
365         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
366         i = j;
367       }
368     }
369     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
370   }
371 
372   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
373   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
374 
375   /* determine if any processor has disassembled, if so we must
376      also disassemble ourselfs, in order that we may reassemble. */
377   /*
378      if nonzero structure of submatrix B cannot change then we know that
379      no processor disassembled thus we can skip this stuff
380   */
381   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
382     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
383     if (mat->was_assembled && !other_disassembled) {
384       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
385     }
386   }
387 
388   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
389     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
390   }
391   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
392   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
393 
394   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNC__
399 #define __FUNC__ "MatZeroEntries_MPIAIJ"
400 int MatZeroEntries_MPIAIJ(Mat A)
401 {
402   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
403   int        ierr;
404 
405   PetscFunctionBegin;
406   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
407   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNC__
412 #define __FUNC__ "MatZeroRows_MPIAIJ"
413 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
416   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
417   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
418   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
419   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
420   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
421   MPI_Comm       comm = A->comm;
422   MPI_Request    *send_waits,*recv_waits;
423   MPI_Status     recv_status,*send_status;
424   IS             istmp;
425 
426   PetscFunctionBegin;
427   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
428   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
429 
430   /*  first count number of contributors to each processor */
431   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
432   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
433   procs  = nprocs + size;
434   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
435   for ( i=0; i<N; i++ ) {
436     idx = rows[i];
437     found = 0;
438     for ( j=0; j<size; j++ ) {
439       if (idx >= owners[j] && idx < owners[j+1]) {
440         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
441       }
442     }
443     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
444   }
445   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
446 
447   /* inform other processors of number of messages and max length*/
448   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
449   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
450   nrecvs = work[rank];
451   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
452   nmax = work[rank];
453   PetscFree(work);
454 
455   /* post receives:   */
456   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
457   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
458   for ( i=0; i<nrecvs; i++ ) {
459     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
460   }
461 
462   /* do sends:
463       1) starts[i] gives the starting index in svalues for stuff going to
464          the ith processor
465   */
466   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
467   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
468   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
469   starts[0] = 0;
470   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
471   for ( i=0; i<N; i++ ) {
472     svalues[starts[owner[i]]++] = rows[i];
473   }
474   ISRestoreIndices(is,&rows);
475 
476   starts[0] = 0;
477   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
478   count = 0;
479   for ( i=0; i<size; i++ ) {
480     if (procs[i]) {
481       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
482     }
483   }
484   PetscFree(starts);
485 
486   base = owners[rank];
487 
488   /*  wait on receives */
489   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
490   source = lens + nrecvs;
491   count  = nrecvs; slen = 0;
492   while (count) {
493     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
494     /* unpack receives into our local space */
495     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
496     source[imdex]  = recv_status.MPI_SOURCE;
497     lens[imdex]  = n;
498     slen += n;
499     count--;
500   }
501   PetscFree(recv_waits);
502 
503   /* move the data into the send scatter */
504   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
505   count = 0;
506   for ( i=0; i<nrecvs; i++ ) {
507     values = rvalues + i*nmax;
508     for ( j=0; j<lens[i]; j++ ) {
509       lrows[count++] = values[j] - base;
510     }
511   }
512   PetscFree(rvalues); PetscFree(lens);
513   PetscFree(owner); PetscFree(nprocs);
514 
515   /* actually zap the local rows */
516   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
517   PLogObjectParent(A,istmp);
518 
519   /*
520         Zero the required rows. If the "diagonal block" of the matrix
521      is square and the user wishes to set the diagonal we use seperate
522      code so that MatSetValues() is not called for each diagonal allocating
523      new memory, thus calling lots of mallocs and slowing things down.
524 
525        Contributed by: Mathew Knepley
526   */
527   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
528   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
529   if (diag && (l->A->M == l->A->N)) {
530     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
531   } else if (diag) {
532     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
533     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
534       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
535 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
536     }
537     for ( i = 0; i < slen; i++ ) {
538       row  = lrows[i] + rstart;
539       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
540     }
541     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
543   } else {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545   }
546   ierr = ISDestroy(istmp);CHKERRQ(ierr);
547   PetscFree(lrows);
548 
549   /* wait on sends */
550   if (nsends) {
551     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
552     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
553     PetscFree(send_status);
554   }
555   PetscFree(send_waits); PetscFree(svalues);
556 
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNC__
561 #define __FUNC__ "MatMult_MPIAIJ"
562 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
563 {
564   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
565   int        ierr,nt;
566 
567   PetscFunctionBegin;
568   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
569   if (nt != a->n) {
570     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
571   }
572   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
573   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
574   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
575   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatMultAdd_MPIAIJ"
581 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
584   int        ierr;
585 
586   PetscFunctionBegin;
587   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
588   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
589   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNC__
595 #define __FUNC__ "MatMultTrans_MPIAIJ"
596 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
597 {
598   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
599   int        ierr;
600 
601   PetscFunctionBegin;
602   /* do nondiagonal part */
603   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
604   /* send it on its way */
605   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
606   /* do local part */
607   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
608   /* receive remote parts: note this assumes the values are not actually */
609   /* inserted in yy until the next line, which is true for my implementation*/
610   /* but is not perhaps always true. */
611   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNC__
616 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
617 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
620   int        ierr;
621 
622   PetscFunctionBegin;
623   /* do nondiagonal part */
624   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
625   /* send it on its way */
626   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
627   /* do local part */
628   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
629   /* receive remote parts: note this assumes the values are not actually */
630   /* inserted in yy until the next line, which is true for my implementation*/
631   /* but is not perhaps always true. */
632   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637   This only works correctly for square matrices where the subblock A->A is the
638    diagonal block
639 */
640 #undef __FUNC__
641 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
642 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
643 {
644   int        ierr;
645   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
646 
647   PetscFunctionBegin;
648   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
649   if (a->rstart != a->cstart || a->rend != a->cend) {
650     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
651   }
652   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNC__
657 #define __FUNC__ "MatScale_MPIAIJ"
658 int MatScale_MPIAIJ(Scalar *aa,Mat A)
659 {
660   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
661   int        ierr;
662 
663   PetscFunctionBegin;
664   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
665   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatDestroy_MPIAIJ"
671 int MatDestroy_MPIAIJ(Mat mat)
672 {
673   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
674   int        ierr;
675 
676   PetscFunctionBegin;
677   if (--mat->refct > 0) PetscFunctionReturn(0);
678 
679   if (mat->mapping) {
680     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
681   }
682   if (mat->bmapping) {
683     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
684   }
685   if (mat->rmap) {
686     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
687   }
688   if (mat->cmap) {
689     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
690   }
691 #if defined(USE_PETSC_LOG)
692   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
693 #endif
694   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
695   PetscFree(aij->rowners);
696   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
697   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
698 #if defined (USE_CTABLE)
699   if (aij->colmap) TableDelete(aij->colmap);
700 #else
701   if (aij->colmap) PetscFree(aij->colmap);
702 #endif
703   if (aij->garray) PetscFree(aij->garray);
704   if (aij->lvec)   VecDestroy(aij->lvec);
705   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
706   if (aij->rowvalues) PetscFree(aij->rowvalues);
707   PetscFree(aij);
708   PLogObjectDestroy(mat);
709   PetscHeaderDestroy(mat);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNC__
714 #define __FUNC__ "MatView_MPIAIJ_Binary"
715 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
716 {
717   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
718   int         ierr;
719 
720   PetscFunctionBegin;
721   if (aij->size == 1) {
722     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
723   }
724   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNC__
729 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
730 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
731 {
732   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
733   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
734   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
735   int         size = aij->size;
736   FILE        *fd;
737   ViewerType  vtype;
738 
739   PetscFunctionBegin;
740   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
741   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
742     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
743     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
744       MatInfo info;
745       int     flg;
746       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
747       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
748       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
749       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
750       PetscSequentialPhaseBegin(mat->comm,1);
751       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
752          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
753       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
754          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
755       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
756       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
757       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
758       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
759       fflush(fd);
760       PetscSequentialPhaseEnd(mat->comm,1);
761       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
762       PetscFunctionReturn(0);
763     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
764       PetscFunctionReturn(0);
765     }
766   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
767     Draw       draw;
768     PetscTruth isnull;
769     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
770     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
771   }
772 
773   if (size == 1) {
774     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
775   } else {
776     /* assemble the entire matrix onto first processor. */
777     Mat         A;
778     Mat_SeqAIJ *Aloc;
779     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
780     Scalar      *a;
781 
782     if (!rank) {
783       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
784     } else {
785       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
786     }
787     PLogObjectParent(mat,A);
788 
789     /* copy over the A part */
790     Aloc = (Mat_SeqAIJ*) aij->A->data;
791     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
792     row = aij->rstart;
793     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
794     for ( i=0; i<m; i++ ) {
795       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
796       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
797     }
798     aj = Aloc->j;
799     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
800 
801     /* copy over the B part */
802     Aloc = (Mat_SeqAIJ*) aij->B->data;
803     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
804     row = aij->rstart;
805     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
806     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
807     for ( i=0; i<m; i++ ) {
808       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
809       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
810     }
811     PetscFree(ct);
812     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
813     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
814     /*
815        Everyone has to call to draw the matrix since the graphics waits are
816        synchronized across all processors that share the Draw object
817     */
818     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
819       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
820     }
821     ierr = MatDestroy(A);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNC__
827 #define __FUNC__ "MatView_MPIAIJ"
828 int MatView_MPIAIJ(Mat mat,Viewer viewer)
829 {
830   int         ierr;
831   ViewerType  vtype;
832 
833   PetscFunctionBegin;
834   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
835   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
836       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
837     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
838   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
839     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
840   } else {
841     SETERRQ(1,1,"Viewer type not supported by PETSc object");
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 /*
847     This has to provide several versions.
848 
849      2) a) use only local smoothing updating outer values only once.
850         b) local smoothing updating outer values each inner iteration
851      3) color updating out values betwen colors.
852 */
853 #undef __FUNC__
854 #define __FUNC__ "MatRelax_MPIAIJ"
855 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
856                            double fshift,int its,Vec xx)
857 {
858   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
859   Mat        AA = mat->A, BB = mat->B;
860   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
861   Scalar     *b,*x,*xs,*ls,d,*v,sum;
862   int        ierr,*idx, *diag;
863   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
864 
865   PetscFunctionBegin;
866   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
867   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
868   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
869   xs = x + shift; /* shift by one for index start of 1 */
870   ls = ls + shift;
871   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
872   diag = A->diag;
873   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
874     if (flag & SOR_ZERO_INITIAL_GUESS) {
875       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
876       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
877       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
878       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     }
881     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
882     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
883     while (its--) {
884       /* go down through the rows */
885       for ( i=0; i<m; i++ ) {
886         n    = A->i[i+1] - A->i[i];
887 	PLogFlops(4*n+3);
888         idx  = A->j + A->i[i] + shift;
889         v    = A->a + A->i[i] + shift;
890         sum  = b[i];
891         SPARSEDENSEMDOT(sum,xs,v,idx,n);
892         d    = fshift + A->a[diag[i]+shift];
893         n    = B->i[i+1] - B->i[i];
894         idx  = B->j + B->i[i] + shift;
895         v    = B->a + B->i[i] + shift;
896         SPARSEDENSEMDOT(sum,ls,v,idx,n);
897         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
898       }
899       /* come up through the rows */
900       for ( i=m-1; i>-1; i-- ) {
901         n    = A->i[i+1] - A->i[i];
902 	PLogFlops(4*n+3)
903         idx  = A->j + A->i[i] + shift;
904         v    = A->a + A->i[i] + shift;
905         sum  = b[i];
906         SPARSEDENSEMDOT(sum,xs,v,idx,n);
907         d    = fshift + A->a[diag[i]+shift];
908         n    = B->i[i+1] - B->i[i];
909         idx  = B->j + B->i[i] + shift;
910         v    = B->a + B->i[i] + shift;
911         SPARSEDENSEMDOT(sum,ls,v,idx,n);
912         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
913       }
914     }
915   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
916     if (flag & SOR_ZERO_INITIAL_GUESS) {
917       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
918       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
919       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
920       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
921       PetscFunctionReturn(0);
922     }
923     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
924     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
925     while (its--) {
926       for ( i=0; i<m; i++ ) {
927         n    = A->i[i+1] - A->i[i];
928 	PLogFlops(4*n+3);
929         idx  = A->j + A->i[i] + shift;
930         v    = A->a + A->i[i] + shift;
931         sum  = b[i];
932         SPARSEDENSEMDOT(sum,xs,v,idx,n);
933         d    = fshift + A->a[diag[i]+shift];
934         n    = B->i[i+1] - B->i[i];
935         idx  = B->j + B->i[i] + shift;
936         v    = B->a + B->i[i] + shift;
937         SPARSEDENSEMDOT(sum,ls,v,idx,n);
938         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
939       }
940     }
941   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
942     if (flag & SOR_ZERO_INITIAL_GUESS) {
943       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
944       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
945       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
946       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
947       PetscFunctionReturn(0);
948     }
949     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
950     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
951     while (its--) {
952       for ( i=m-1; i>-1; i-- ) {
953         n    = A->i[i+1] - A->i[i];
954 	PLogFlops(4*n+3);
955         idx  = A->j + A->i[i] + shift;
956         v    = A->a + A->i[i] + shift;
957         sum  = b[i];
958         SPARSEDENSEMDOT(sum,xs,v,idx,n);
959         d    = fshift + A->a[diag[i]+shift];
960         n    = B->i[i+1] - B->i[i];
961         idx  = B->j + B->i[i] + shift;
962         v    = B->a + B->i[i] + shift;
963         SPARSEDENSEMDOT(sum,ls,v,idx,n);
964         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
965       }
966     }
967   } else {
968     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
969   }
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
972   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNC__
977 #define __FUNC__ "MatGetInfo_MPIAIJ"
978 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
979 {
980   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
981   Mat        A = mat->A, B = mat->B;
982   int        ierr;
983   double     isend[5], irecv[5];
984 
985   PetscFunctionBegin;
986   info->block_size     = 1.0;
987   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
988   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
989   isend[3] = info->memory;  isend[4] = info->mallocs;
990   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
991   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
992   isend[3] += info->memory;  isend[4] += info->mallocs;
993   if (flag == MAT_LOCAL) {
994     info->nz_used      = isend[0];
995     info->nz_allocated = isend[1];
996     info->nz_unneeded  = isend[2];
997     info->memory       = isend[3];
998     info->mallocs      = isend[4];
999   } else if (flag == MAT_GLOBAL_MAX) {
1000     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1001     info->nz_used      = irecv[0];
1002     info->nz_allocated = irecv[1];
1003     info->nz_unneeded  = irecv[2];
1004     info->memory       = irecv[3];
1005     info->mallocs      = irecv[4];
1006   } else if (flag == MAT_GLOBAL_SUM) {
1007     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1008     info->nz_used      = irecv[0];
1009     info->nz_allocated = irecv[1];
1010     info->nz_unneeded  = irecv[2];
1011     info->memory       = irecv[3];
1012     info->mallocs      = irecv[4];
1013   }
1014   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1015   info->fill_ratio_needed = 0;
1016   info->factor_mallocs    = 0;
1017   info->rows_global       = (double)mat->M;
1018   info->columns_global    = (double)mat->N;
1019   info->rows_local        = (double)mat->m;
1020   info->columns_local     = (double)mat->N;
1021 
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNC__
1026 #define __FUNC__ "MatSetOption_MPIAIJ"
1027 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1028 {
1029   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1030   int        ierr;
1031 
1032   PetscFunctionBegin;
1033   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1034       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1035       op == MAT_COLUMNS_UNSORTED ||
1036       op == MAT_COLUMNS_SORTED ||
1037       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1038       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1039         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1040         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1041   } else if (op == MAT_ROW_ORIENTED) {
1042     a->roworiented = 1;
1043     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1044     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1045   } else if (op == MAT_ROWS_SORTED ||
1046              op == MAT_ROWS_UNSORTED ||
1047              op == MAT_SYMMETRIC ||
1048              op == MAT_STRUCTURALLY_SYMMETRIC ||
1049              op == MAT_YES_NEW_DIAGONALS)
1050     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1051   else if (op == MAT_COLUMN_ORIENTED) {
1052     a->roworiented = 0;
1053     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1054     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1055   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1056     a->donotstash = 1;
1057   } else if (op == MAT_NO_NEW_DIAGONALS){
1058     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1059   } else {
1060     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNC__
1066 #define __FUNC__ "MatGetSize_MPIAIJ"
1067 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1068 {
1069   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1070 
1071   PetscFunctionBegin;
1072   if (m) *m = mat->M;
1073   if (n) *n = mat->N;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNC__
1078 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1079 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1082 
1083   PetscFunctionBegin;
1084   if (m) *m = mat->m;
1085   if (n) *n = mat->n;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNC__
1090 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1091 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1092 {
1093   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1094 
1095   PetscFunctionBegin;
1096   *m = mat->rstart; *n = mat->rend;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatGetRow_MPIAIJ"
1102 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1103 {
1104   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1105   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1106   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1107   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1108   int        *cmap, *idx_p;
1109 
1110   PetscFunctionBegin;
1111   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1112   mat->getrowactive = PETSC_TRUE;
1113 
1114   if (!mat->rowvalues && (idx || v)) {
1115     /*
1116         allocate enough space to hold information from the longest row.
1117     */
1118     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1119     int     max = 1,m = mat->m,tmp;
1120     for ( i=0; i<m; i++ ) {
1121       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1122       if (max < tmp) { max = tmp; }
1123     }
1124     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1125     mat->rowindices = (int *) (mat->rowvalues + max);
1126   }
1127 
1128   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1129   lrow = row - rstart;
1130 
1131   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1132   if (!v)   {pvA = 0; pvB = 0;}
1133   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1134   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1135   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1136   nztot = nzA + nzB;
1137 
1138   cmap  = mat->garray;
1139   if (v  || idx) {
1140     if (nztot) {
1141       /* Sort by increasing column numbers, assuming A and B already sorted */
1142       int imark = -1;
1143       if (v) {
1144         *v = v_p = mat->rowvalues;
1145         for ( i=0; i<nzB; i++ ) {
1146           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1147           else break;
1148         }
1149         imark = i;
1150         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1151         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1152       }
1153       if (idx) {
1154         *idx = idx_p = mat->rowindices;
1155         if (imark > -1) {
1156           for ( i=0; i<imark; i++ ) {
1157             idx_p[i] = cmap[cworkB[i]];
1158           }
1159         } else {
1160           for ( i=0; i<nzB; i++ ) {
1161             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1162             else break;
1163           }
1164           imark = i;
1165         }
1166         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1167         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1168       }
1169     } else {
1170       if (idx) *idx = 0;
1171       if (v)   *v   = 0;
1172     }
1173   }
1174   *nz = nztot;
1175   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1176   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1182 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1183 {
1184   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1185 
1186   PetscFunctionBegin;
1187   if (aij->getrowactive == PETSC_FALSE) {
1188     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1189   }
1190   aij->getrowactive = PETSC_FALSE;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNC__
1195 #define __FUNC__ "MatNorm_MPIAIJ"
1196 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1197 {
1198   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1199   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1200   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1201   double     sum = 0.0;
1202   Scalar     *v;
1203 
1204   PetscFunctionBegin;
1205   if (aij->size == 1) {
1206     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1207   } else {
1208     if (type == NORM_FROBENIUS) {
1209       v = amat->a;
1210       for (i=0; i<amat->nz; i++ ) {
1211 #if defined(USE_PETSC_COMPLEX)
1212         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1213 #else
1214         sum += (*v)*(*v); v++;
1215 #endif
1216       }
1217       v = bmat->a;
1218       for (i=0; i<bmat->nz; i++ ) {
1219 #if defined(USE_PETSC_COMPLEX)
1220         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1221 #else
1222         sum += (*v)*(*v); v++;
1223 #endif
1224       }
1225       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1226       *norm = sqrt(*norm);
1227     } else if (type == NORM_1) { /* max column norm */
1228       double *tmp, *tmp2;
1229       int    *jj, *garray = aij->garray;
1230       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1231       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1232       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1233       *norm = 0.0;
1234       v = amat->a; jj = amat->j;
1235       for ( j=0; j<amat->nz; j++ ) {
1236         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1237       }
1238       v = bmat->a; jj = bmat->j;
1239       for ( j=0; j<bmat->nz; j++ ) {
1240         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1241       }
1242       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1243       for ( j=0; j<aij->N; j++ ) {
1244         if (tmp2[j] > *norm) *norm = tmp2[j];
1245       }
1246       PetscFree(tmp); PetscFree(tmp2);
1247     } else if (type == NORM_INFINITY) { /* max row norm */
1248       double ntemp = 0.0;
1249       for ( j=0; j<amat->m; j++ ) {
1250         v = amat->a + amat->i[j] + shift;
1251         sum = 0.0;
1252         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1253           sum += PetscAbsScalar(*v); v++;
1254         }
1255         v = bmat->a + bmat->i[j] + shift;
1256         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1257           sum += PetscAbsScalar(*v); v++;
1258         }
1259         if (sum > ntemp) ntemp = sum;
1260       }
1261       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1262     } else {
1263       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1264     }
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNC__
1270 #define __FUNC__ "MatTranspose_MPIAIJ"
1271 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1272 {
1273   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1274   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1275   int        ierr,shift = Aloc->indexshift;
1276   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1277   Mat        B;
1278   Scalar     *array;
1279 
1280   PetscFunctionBegin;
1281   if (matout == PETSC_NULL && M != N) {
1282     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1283   }
1284 
1285   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1286 
1287   /* copy over the A part */
1288   Aloc = (Mat_SeqAIJ*) a->A->data;
1289   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1290   row = a->rstart;
1291   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1292   for ( i=0; i<m; i++ ) {
1293     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1294     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1295   }
1296   aj = Aloc->j;
1297   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1298 
1299   /* copy over the B part */
1300   Aloc = (Mat_SeqAIJ*) a->B->data;
1301   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1302   row = a->rstart;
1303   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1304   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1305   for ( i=0; i<m; i++ ) {
1306     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1307     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1308   }
1309   PetscFree(ct);
1310   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1311   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1312   if (matout != PETSC_NULL) {
1313     *matout = B;
1314   } else {
1315     PetscOps       *Abops;
1316     struct _MatOps *Aops;
1317 
1318     /* This isn't really an in-place transpose .... but free data structures from a */
1319     PetscFree(a->rowners);
1320     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1321     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1322 #if defined (USE_CTABLE)
1323     if (a->colmap) TableDelete(a->colmap);
1324 #else
1325     if (a->colmap) PetscFree(a->colmap);
1326 #endif
1327     if (a->garray) PetscFree(a->garray);
1328     if (a->lvec) VecDestroy(a->lvec);
1329     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1330     PetscFree(a);
1331 
1332     /*
1333        This is horrible, horrible code. We need to keep the
1334       A pointers for the bops and ops but copy everything
1335       else from C.
1336     */
1337     Abops   = A->bops;
1338     Aops    = A->ops;
1339     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1340     A->bops = Abops;
1341     A->ops  = Aops;
1342     PetscHeaderDestroy(B);
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1349 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1350 {
1351   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1352   Mat a = aij->A, b = aij->B;
1353   int ierr,s1,s2,s3;
1354 
1355   PetscFunctionBegin;
1356   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1357   if (rr) {
1358     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1359     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1360     /* Overlap communication with computation. */
1361     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1362   }
1363   if (ll) {
1364     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1365     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1366     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1367   }
1368   /* scale  the diagonal block */
1369   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1370 
1371   if (rr) {
1372     /* Do a scatter end and then right scale the off-diagonal block */
1373     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1374     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1375   }
1376 
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1383 int MatPrintHelp_MPIAIJ(Mat A)
1384 {
1385   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1386   int        ierr;
1387 
1388   PetscFunctionBegin;
1389   if (!a->rank) {
1390     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1397 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1398 {
1399   PetscFunctionBegin;
1400   *bs = 1;
1401   PetscFunctionReturn(0);
1402 }
1403 #undef __FUNC__
1404 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1405 int MatSetUnfactored_MPIAIJ(Mat A)
1406 {
1407   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1408   int        ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatEqual_MPIAIJ"
1417 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1418 {
1419   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1420   Mat        a, b, c, d;
1421   PetscTruth flg;
1422   int        ierr;
1423 
1424   PetscFunctionBegin;
1425   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1426   a = matA->A; b = matA->B;
1427   c = matB->A; d = matB->B;
1428 
1429   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1430   if (flg == PETSC_TRUE) {
1431     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1432   }
1433   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNC__
1438 #define __FUNC__ "MatCopy_MPIAIJ"
1439 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1440 {
1441   int        ierr;
1442   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1443   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1444 
1445   PetscFunctionBegin;
1446   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1447     /* because of the column compression in the off-processor part of the matrix a->B,
1448        the number of columns in a->B and b->B may be different, hence we cannot call
1449        the MatCopy() directly on the two parts. If need be, we can provide a more
1450        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1451        then copying the submatrices */
1452     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1453   } else {
1454     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1455     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1461 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1462 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1463 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1464 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1465 
1466 /* -------------------------------------------------------------------*/
1467 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1468        MatGetRow_MPIAIJ,
1469        MatRestoreRow_MPIAIJ,
1470        MatMult_MPIAIJ,
1471        MatMultAdd_MPIAIJ,
1472        MatMultTrans_MPIAIJ,
1473        MatMultTransAdd_MPIAIJ,
1474        0,
1475        0,
1476        0,
1477        0,
1478        0,
1479        0,
1480        MatRelax_MPIAIJ,
1481        MatTranspose_MPIAIJ,
1482        MatGetInfo_MPIAIJ,
1483        MatEqual_MPIAIJ,
1484        MatGetDiagonal_MPIAIJ,
1485        MatDiagonalScale_MPIAIJ,
1486        MatNorm_MPIAIJ,
1487        MatAssemblyBegin_MPIAIJ,
1488        MatAssemblyEnd_MPIAIJ,
1489        0,
1490        MatSetOption_MPIAIJ,
1491        MatZeroEntries_MPIAIJ,
1492        MatZeroRows_MPIAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497        MatGetSize_MPIAIJ,
1498        MatGetLocalSize_MPIAIJ,
1499        MatGetOwnershipRange_MPIAIJ,
1500        0,
1501        0,
1502        0,
1503        0,
1504        MatDuplicate_MPIAIJ,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        MatGetSubMatrices_MPIAIJ,
1511        MatIncreaseOverlap_MPIAIJ,
1512        MatGetValues_MPIAIJ,
1513        MatCopy_MPIAIJ,
1514        MatPrintHelp_MPIAIJ,
1515        MatScale_MPIAIJ,
1516        0,
1517        0,
1518        0,
1519        MatGetBlockSize_MPIAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524        MatFDColoringCreate_MPIAIJ,
1525        0,
1526        MatSetUnfactored_MPIAIJ,
1527        0,
1528        0,
1529        MatGetSubMatrix_MPIAIJ,
1530        0,
1531        0,
1532        MatGetMaps_Petsc};
1533 
1534 /* ----------------------------------------------------------------------------------------*/
1535 
1536 EXTERN_C_BEGIN
1537 #undef __FUNC__
1538 #define __FUNC__ "MatStoreValues_MPIAIJ"
1539 int MatStoreValues_MPIAIJ(Mat mat)
1540 {
1541   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1542   int        ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1546   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 EXTERN_C_END
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNC__
1553 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1554 int MatRetrieveValues_MPIAIJ(Mat mat)
1555 {
1556   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1557   int        ierr;
1558 
1559   PetscFunctionBegin;
1560   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1561   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 EXTERN_C_END
1565 
1566 #include "pc.h"
1567 EXTERN_C_BEGIN
1568 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1569 EXTERN_C_END
1570 
1571 #undef __FUNC__
1572 #define __FUNC__ "MatCreateMPIAIJ"
1573 /*@C
1574    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1575    (the default parallel PETSc format).  For good matrix assembly performance
1576    the user should preallocate the matrix storage by setting the parameters
1577    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1578    performance can be increased by more than a factor of 50.
1579 
1580    Collective on MPI_Comm
1581 
1582    Input Parameters:
1583 +  comm - MPI communicator
1584 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1585            This value should be the same as the local size used in creating the
1586            y vector for the matrix-vector product y = Ax.
1587 .  n - This value should be the same as the local size used in creating the
1588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1589        calculated if N is given) For square matrices n is almost always m.
1590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1592 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1593            (same value is used for all local rows)
1594 .  d_nnz - array containing the number of nonzeros in the various rows of the
1595            DIAGONAL portion of the local submatrix (possibly different for each row)
1596            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1597            The size of this array is equal to the number of local rows, i.e 'm'.
1598            You must leave room for the diagonal entry even if it is zero.
1599 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1600            submatrix (same value is used for all local rows).
1601 -  o_nnz - array containing the number of nonzeros in the various rows of the
1602            OFF-DIAGONAL portion of the local submatrix (possibly different for
1603            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1604            structure. The size of this array is equal to the number
1605            of local rows, i.e 'm'.
1606 
1607    Output Parameter:
1608 .  A - the matrix
1609 
1610    Notes:
1611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1613    storage requirements for this matrix.
1614 
1615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1616    processor than it must be used on all processors that share the object for
1617    that argument.
1618 
1619    The AIJ format (also called the Yale sparse matrix format or
1620    compressed row storage), is fully compatible with standard Fortran 77
1621    storage.  That is, the stored row and column indices can begin at
1622    either one (as in Fortran) or zero.  See the users manual for details.
1623 
1624    The user MUST specify either the local or global matrix dimensions
1625    (possibly both).
1626 
1627    The parallel matrix is partitioned such that the first m0 rows belong to
1628    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1629    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1630 
1631    The DIAGONAL portion of the local submatrix of a processor can be defined
1632    as the submatrix which is obtained by extraction the part corresponding
1633    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1634    first row that belongs to the processor, and r2 is the last row belonging
1635    to the this processor. This is a square mxm matrix. The remaining portion
1636    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1637 
1638    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1639 
1640    By default, this format uses inodes (identical nodes) when possible.
1641    We search for consecutive rows with the same nonzero structure, thereby
1642    reusing matrix information to achieve increased efficiency.
1643 
1644    Options Database Keys:
1645 +  -mat_aij_no_inode  - Do not use inodes
1646 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1647 -  -mat_aij_oneindex - Internally use indexing starting at 1
1648         rather than 0.  Note that when calling MatSetValues(),
1649         the user still MUST index entries starting at 0!
1650 .   -mat_mpi - use the parallel matrix data structures even on one processor
1651                (defaults to using SeqBAIJ format on one processor)
1652 .   -mat_mpi - use the parallel matrix data structures even on one processor
1653                (defaults to using SeqAIJ format on one processor)
1654 
1655 
1656    Example usage:
1657 
1658    Consider the following 8x8 matrix with 34 non-zero values, that is
1659    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1660    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1661    as follows:
1662 
1663 .vb
1664             1  2  0  |  0  3  0  |  0  4
1665     Proc0   0  5  6  |  7  0  0  |  8  0
1666             9  0 10  | 11  0  0  | 12  0
1667     -------------------------------------
1668            13  0 14  | 15 16 17  |  0  0
1669     Proc1   0 18  0  | 19 20 21  |  0  0
1670             0  0  0  | 22 23  0  | 24  0
1671     -------------------------------------
1672     Proc2  25 26 27  |  0  0 28  | 29  0
1673            30  0  0  | 31 32 33  |  0 34
1674 .ve
1675 
1676    This can be represented as a collection of submatrices as:
1677 
1678 .vb
1679       A B C
1680       D E F
1681       G H I
1682 .ve
1683 
1684    Where the submatrices A,B,C are owned by proc0, D,E,F are
1685    owned by proc1, G,H,I are owned by proc2.
1686 
1687    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1688    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1689    The 'M','N' parameters are 8,8, and have the same values on all procs.
1690 
1691    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1692    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1693    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1694    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1695    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1696    matrix, ans [DF] as another SeqAIJ matrix.
1697 
1698    When d_nz, o_nz parameters are specified, d_nz storage elements are
1699    allocated for every row of the local diagonal submatrix, and o_nz
1700    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1701    One way to choose d_nz and o_nz is to use the max nonzerors per local
1702    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1703    In this case, the values of d_nz,o_nz are:
1704 .vb
1705      proc0 : dnz = 2, o_nz = 2
1706      proc1 : dnz = 3, o_nz = 2
1707      proc2 : dnz = 1, o_nz = 4
1708 .ve
1709    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1710    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1711    for proc3. i.e we are using 12+15+10=37 storage locations to store
1712    34 values.
1713 
1714    When d_nnz, o_nnz parameters are specified, the storage is specified
1715    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1716    In the above case the values for d_nnz,o_nnz are:
1717 .vb
1718      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1719      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1720      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1721 .ve
1722    Here the space allocated is sum of all the above values i.e 34, and
1723    hence pre-allocation is perfect.
1724 
1725    Level: intermediate
1726 
1727 .keywords: matrix, aij, compressed row, sparse, parallel
1728 
1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1730 @*/
1731 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1732 {
1733   Mat          B;
1734   Mat_MPIAIJ   *b;
1735   int          ierr, i,size,flag1 = 0, flag2 = 0;
1736 
1737   PetscFunctionBegin;
1738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1739   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1740   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1741   if (!flag1 && !flag2 && size == 1) {
1742     if (M == PETSC_DECIDE) M = m;
1743     if (N == PETSC_DECIDE) N = n;
1744     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1745     PetscFunctionReturn(0);
1746   }
1747 
1748   *A = 0;
1749   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1750   PLogObjectCreate(B);
1751   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1752   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1753   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1754   B->ops->destroy = MatDestroy_MPIAIJ;
1755   B->ops->view    = MatView_MPIAIJ;
1756   B->factor       = 0;
1757   B->assembled    = PETSC_FALSE;
1758   B->mapping      = 0;
1759 
1760   B->insertmode      = NOT_SET_VALUES;
1761   b->size            = size;
1762   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1763 
1764   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1765     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1766   }
1767 
1768   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1769   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1770   b->m = m; B->m = m;
1771   b->n = n; B->n = n;
1772   b->N = N; B->N = N;
1773   b->M = M; B->M = M;
1774 
1775   /* the information in the maps duplicates the information computed below, eventually
1776      we should remove the duplicate information that is not contained in the maps */
1777   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1778   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1779 
1780   /* build local table of row and column ownerships */
1781   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1782   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1783   b->cowners = b->rowners + b->size + 2;
1784   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1785   b->rowners[0] = 0;
1786   for ( i=2; i<=b->size; i++ ) {
1787     b->rowners[i] += b->rowners[i-1];
1788   }
1789   b->rstart = b->rowners[b->rank];
1790   b->rend   = b->rowners[b->rank+1];
1791   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1792   b->cowners[0] = 0;
1793   for ( i=2; i<=b->size; i++ ) {
1794     b->cowners[i] += b->cowners[i-1];
1795   }
1796   b->cstart = b->cowners[b->rank];
1797   b->cend   = b->cowners[b->rank+1];
1798 
1799   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1800   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1801   PLogObjectParent(B,b->A);
1802   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1803   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1804   PLogObjectParent(B,b->B);
1805 
1806   /* build cache for off array entries formed */
1807   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1808   b->donotstash  = 0;
1809   b->colmap      = 0;
1810   b->garray      = 0;
1811   b->roworiented = 1;
1812 
1813   /* stuff used for matrix vector multiply */
1814   b->lvec      = 0;
1815   b->Mvctx     = 0;
1816 
1817   /* stuff for MatGetRow() */
1818   b->rowindices   = 0;
1819   b->rowvalues    = 0;
1820   b->getrowactive = PETSC_FALSE;
1821 
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1823                                      "MatStoreValues_MPIAIJ",
1824                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1826                                      "MatRetrieveValues_MPIAIJ",
1827                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1829                                      "MatGetDiagonalBlock_MPIAIJ",
1830                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1831   *A = B;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNC__
1836 #define __FUNC__ "MatDuplicate_MPIAIJ"
1837 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1838 {
1839   Mat        mat;
1840   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1841   int        ierr, len=0, flg;
1842 
1843   PetscFunctionBegin;
1844   *newmat       = 0;
1845   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1846   PLogObjectCreate(mat);
1847   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1848   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1849   mat->ops->destroy = MatDestroy_MPIAIJ;
1850   mat->ops->view    = MatView_MPIAIJ;
1851   mat->factor       = matin->factor;
1852   mat->assembled    = PETSC_TRUE;
1853   mat->insertmode   = NOT_SET_VALUES;
1854 
1855   a->m = mat->m   = oldmat->m;
1856   a->n = mat->n   = oldmat->n;
1857   a->M = mat->M   = oldmat->M;
1858   a->N = mat->N   = oldmat->N;
1859 
1860   a->rstart       = oldmat->rstart;
1861   a->rend         = oldmat->rend;
1862   a->cstart       = oldmat->cstart;
1863   a->cend         = oldmat->cend;
1864   a->size         = oldmat->size;
1865   a->rank         = oldmat->rank;
1866   a->donotstash   = oldmat->donotstash;
1867   a->roworiented  = oldmat->roworiented;
1868   a->rowindices   = 0;
1869   a->rowvalues    = 0;
1870   a->getrowactive = PETSC_FALSE;
1871 
1872   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1873   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1874   a->cowners = a->rowners + a->size + 2;
1875   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1876   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1877   if (oldmat->colmap) {
1878 #if defined (USE_CTABLE)
1879     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1880 #else
1881     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1882     PLogObjectMemory(mat,(a->N)*sizeof(int));
1883     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1884 #endif
1885   } else a->colmap = 0;
1886   if (oldmat->garray) {
1887     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1888     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1889     PLogObjectMemory(mat,len*sizeof(int));
1890     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1891   } else a->garray = 0;
1892 
1893   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1894   PLogObjectParent(mat,a->lvec);
1895   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1896   PLogObjectParent(mat,a->Mvctx);
1897   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1898   PLogObjectParent(mat,a->A);
1899   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1900   PLogObjectParent(mat,a->B);
1901   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1902   if (flg) {
1903     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1904   }
1905   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1906   *newmat = mat;
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #include "sys.h"
1911 
1912 #undef __FUNC__
1913 #define __FUNC__ "MatLoad_MPIAIJ"
1914 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1915 {
1916   Mat          A;
1917   Scalar       *vals,*svals;
1918   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1919   MPI_Status   status;
1920   int          i, nz, ierr, j,rstart, rend, fd;
1921   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1922   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1923   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1924 
1925   PetscFunctionBegin;
1926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1927   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1928   if (!rank) {
1929     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1930     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1931     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1932     if (header[3] < 0) {
1933       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1934     }
1935   }
1936 
1937   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1938   M = header[1]; N = header[2];
1939   /* determine ownership of all rows */
1940   m = M/size + ((M % size) > rank);
1941   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1942   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1943   rowners[0] = 0;
1944   for ( i=2; i<=size; i++ ) {
1945     rowners[i] += rowners[i-1];
1946   }
1947   rstart = rowners[rank];
1948   rend   = rowners[rank+1];
1949 
1950   /* distribute row lengths to all processors */
1951   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1952   offlens = ourlens + (rend-rstart);
1953   if (!rank) {
1954     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1955     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1956     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1957     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1958     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1959     PetscFree(sndcounts);
1960   } else {
1961     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1962   }
1963 
1964   if (!rank) {
1965     /* calculate the number of nonzeros on each processor */
1966     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1967     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1968     for ( i=0; i<size; i++ ) {
1969       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1970         procsnz[i] += rowlengths[j];
1971       }
1972     }
1973     PetscFree(rowlengths);
1974 
1975     /* determine max buffer needed and allocate it */
1976     maxnz = 0;
1977     for ( i=0; i<size; i++ ) {
1978       maxnz = PetscMax(maxnz,procsnz[i]);
1979     }
1980     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1981 
1982     /* read in my part of the matrix column indices  */
1983     nz     = procsnz[0];
1984     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1985     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1986 
1987     /* read in every one elses and ship off */
1988     for ( i=1; i<size; i++ ) {
1989       nz   = procsnz[i];
1990       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1991       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1992     }
1993     PetscFree(cols);
1994   } else {
1995     /* determine buffer space needed for message */
1996     nz = 0;
1997     for ( i=0; i<m; i++ ) {
1998       nz += ourlens[i];
1999     }
2000     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2001 
2002     /* receive message of column indices*/
2003     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2004     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2005     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2006   }
2007 
2008   /* determine column ownership if matrix is not square */
2009   if (N != M) {
2010     n      = N/size + ((N % size) > rank);
2011     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2012     cstart = cend - n;
2013   } else {
2014     cstart = rstart;
2015     cend   = rend;
2016     n      = cend - cstart;
2017   }
2018 
2019   /* loop over local rows, determining number of off diagonal entries */
2020   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2021   jj = 0;
2022   for ( i=0; i<m; i++ ) {
2023     for ( j=0; j<ourlens[i]; j++ ) {
2024       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2025       jj++;
2026     }
2027   }
2028 
2029   /* create our matrix */
2030   for ( i=0; i<m; i++ ) {
2031     ourlens[i] -= offlens[i];
2032   }
2033   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2034   A = *newmat;
2035   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2036   for ( i=0; i<m; i++ ) {
2037     ourlens[i] += offlens[i];
2038   }
2039 
2040   if (!rank) {
2041     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2042 
2043     /* read in my part of the matrix numerical values  */
2044     nz   = procsnz[0];
2045     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2046 
2047     /* insert into matrix */
2048     jj      = rstart;
2049     smycols = mycols;
2050     svals   = vals;
2051     for ( i=0; i<m; i++ ) {
2052       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2053       smycols += ourlens[i];
2054       svals   += ourlens[i];
2055       jj++;
2056     }
2057 
2058     /* read in other processors and ship out */
2059     for ( i=1; i<size; i++ ) {
2060       nz   = procsnz[i];
2061       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2062       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2063     }
2064     PetscFree(procsnz);
2065   } else {
2066     /* receive numeric values */
2067     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2068 
2069     /* receive message of values*/
2070     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2071     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2072     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2073 
2074     /* insert into matrix */
2075     jj      = rstart;
2076     smycols = mycols;
2077     svals   = vals;
2078     for ( i=0; i<m; i++ ) {
2079       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2080       smycols += ourlens[i];
2081       svals   += ourlens[i];
2082       jj++;
2083     }
2084   }
2085   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
2086 
2087   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNC__
2093 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2094 /*
2095     Not great since it makes two copies of the submatrix, first an SeqAIJ
2096   in local and then by concatenating the local matrices the end result.
2097   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2098 */
2099 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2100 {
2101   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2102   Mat        *local,M, Mreuse;
2103   Scalar     *vwork,*aa;
2104   MPI_Comm   comm = mat->comm;
2105   Mat_SeqAIJ *aij;
2106   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (call ==  MAT_REUSE_MATRIX) {
2113     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2114     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2115     local = &Mreuse;
2116     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2117   } else {
2118     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2119     Mreuse = *local;
2120     PetscFree(local);
2121   }
2122 
2123   /*
2124       m - number of local rows
2125       n - number of columns (same on all processors)
2126       rstart - first row in new global matrix generated
2127   */
2128   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2129   if (call == MAT_INITIAL_MATRIX) {
2130     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2131     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2132     ii  = aij->i;
2133     jj  = aij->j;
2134 
2135     /*
2136         Determine the number of non-zeros in the diagonal and off-diagonal
2137         portions of the matrix in order to do correct preallocation
2138     */
2139 
2140     /* first get start and end of "diagonal" columns */
2141     if (csize == PETSC_DECIDE) {
2142       nlocal = n/size + ((n % size) > rank);
2143     } else {
2144       nlocal = csize;
2145     }
2146     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2147     rstart = rend - nlocal;
2148     if (rank == size - 1 && rend != n) {
2149       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2150     }
2151 
2152     /* next, compute all the lengths */
2153     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2154     olens = dlens + m;
2155     for ( i=0; i<m; i++ ) {
2156       jend = ii[i+1] - ii[i];
2157       olen = 0;
2158       dlen = 0;
2159       for ( j=0; j<jend; j++ ) {
2160         if ( *jj < rstart || *jj >= rend) olen++;
2161         else dlen++;
2162         jj++;
2163       }
2164       olens[i] = olen;
2165       dlens[i] = dlen;
2166     }
2167     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2168     PetscFree(dlens);
2169   } else {
2170     int ml,nl;
2171 
2172     M = *newmat;
2173     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2174     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2175     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2176     /*
2177          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2178        rather than the slower MatSetValues().
2179     */
2180     M->was_assembled = PETSC_TRUE;
2181     M->assembled     = PETSC_FALSE;
2182   }
2183   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2184   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2185   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2186   ii  = aij->i;
2187   jj  = aij->j;
2188   aa  = aij->a;
2189   for (i=0; i<m; i++) {
2190     row   = rstart + i;
2191     nz    = ii[i+1] - ii[i];
2192     cwork = jj;     jj += nz;
2193     vwork = aa;     aa += nz;
2194     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2195   }
2196 
2197   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   *newmat = M;
2200 
2201   /* save submatrix used in processor for next request */
2202   if (call ==  MAT_INITIAL_MATRIX) {
2203     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2204     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2205   }
2206 
2207   PetscFunctionReturn(0);
2208 }
2209