xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision be0abb6ddea392ceaee217d3645fed7c6ea71822)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.297 1999/06/30 23:51:10 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
455   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
456   nrecvs = work[rank];
457   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
458   nmax = work[rank];
459   ierr = PetscFree(work);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
463   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
464   for ( i=0; i<nrecvs; i++ ) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
473   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
474   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
475   starts[0] = 0;
476   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   for ( i=0; i<N; i++ ) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ISRestoreIndices(is,&rows);
481 
482   starts[0] = 0;
483   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
484   count = 0;
485   for ( i=0; i<size; i++ ) {
486     if (procs[i]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]  = n;
504     slen += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
511   count = 0;
512   for ( i=0; i<nrecvs; i++ ) {
513     values = rvalues + i*nmax;
514     for ( j=0; j<lens[i]; j++ ) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for ( i = 0; i < slen; i++ ) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
560     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatMult_MPIAIJ"
571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr,nt;
575 
576   PetscFunctionBegin;
577   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
578   if (nt != a->n) {
579     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
580   }
581   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
582   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
583   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatMultAdd_MPIAIJ"
590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
591 {
592   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
593   int        ierr;
594 
595   PetscFunctionBegin;
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatMultTrans_MPIAIJ"
605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   /* do nondiagonal part */
612   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
613   /* send it on its way */
614   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   /* do local part */
616   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
617   /* receive remote parts: note this assumes the values are not actually */
618   /* inserted in yy until the next line, which is true for my implementation*/
619   /* but is not perhaps always true. */
620   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
629   int        ierr;
630 
631   PetscFunctionBegin;
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   /* send it on its way */
635   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   /* do local part */
637   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
638   /* receive remote parts: note this assumes the values are not actually */
639   /* inserted in yy until the next line, which is true for my implementation*/
640   /* but is not perhaps always true. */
641   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646   This only works correctly for square matrices where the subblock A->A is the
647    diagonal block
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
652 {
653   int        ierr;
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655 
656   PetscFunctionBegin;
657   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
658   if (a->rstart != a->cstart || a->rend != a->cend) {
659     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
660   }
661   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatScale_MPIAIJ"
667 int MatScale_MPIAIJ(Scalar *aa,Mat A)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   PetscFunctionBegin;
673   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
674   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNC__
679 #define __FUNC__ "MatDestroy_MPIAIJ"
680 int MatDestroy_MPIAIJ(Mat mat)
681 {
682   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686 
687   if (mat->mapping) {
688     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
689   }
690   if (mat->bmapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
692   }
693   if (mat->rmap) {
694     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
695   }
696   if (mat->cmap) {
697     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_USE_LOG)
700   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
701 #endif
702   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
703   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
706 #if defined (PETSC_USE_CTABLE)
707   if (aij->colmap) TableDelete(aij->colmap);
708 #else
709   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
710 #endif
711   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
712   if (aij->lvec)   VecDestroy(aij->lvec);
713   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
714   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
715   ierr = PetscFree(aij);CHKERRQ(ierr);
716   PLogObjectDestroy(mat);
717   PetscHeaderDestroy(mat);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatView_MPIAIJ_Binary"
723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
724 {
725   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
726   int         ierr;
727 
728   PetscFunctionBegin;
729   if (aij->size == 1) {
730     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
731   }
732   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
739 {
740   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
741   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
742   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
743   int         size = aij->size;
744   FILE        *fd;
745   ViewerType  vtype;
746 
747   PetscFunctionBegin;
748   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
749   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
750     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
751     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
752       MatInfo info;
753       int     flg;
754       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
758       PetscSequentialPhaseBegin(mat->comm,1);
759       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
760          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
761       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
762          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
763       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
764       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
765       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
766       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
767       fflush(fd);
768       PetscSequentialPhaseEnd(mat->comm,1);
769       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
770       PetscFunctionReturn(0);
771     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
772       PetscFunctionReturn(0);
773     }
774   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
775     Draw       draw;
776     PetscTruth isnull;
777     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
778     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
779   }
780 
781   if (size == 1) {
782     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
783   } else {
784     /* assemble the entire matrix onto first processor. */
785     Mat         A;
786     Mat_SeqAIJ *Aloc;
787     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
788     Scalar      *a;
789 
790     if (!rank) {
791       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     } else {
793       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
794     }
795     PLogObjectParent(mat,A);
796 
797     /* copy over the A part */
798     Aloc = (Mat_SeqAIJ*) aij->A->data;
799     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     row = aij->rstart;
801     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
802     for ( i=0; i<m; i++ ) {
803       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
804       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
805     }
806     aj = Aloc->j;
807     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
808 
809     /* copy over the B part */
810     Aloc = (Mat_SeqAIJ*) aij->B->data;
811     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
812     row = aij->rstart;
813     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
814     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
815     for ( i=0; i<m; i++ ) {
816       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
817       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
818     }
819     ierr = PetscFree(ct);CHKERRQ(ierr);
820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     /*
823        Everyone has to call to draw the matrix since the graphics waits are
824        synchronized across all processors that share the Draw object
825     */
826     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
828     }
829     ierr = MatDestroy(A);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNC__
835 #define __FUNC__ "MatView_MPIAIJ"
836 int MatView_MPIAIJ(Mat mat,Viewer viewer)
837 {
838   int         ierr;
839   ViewerType  vtype;
840 
841   PetscFunctionBegin;
842   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
843   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
844       PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) {
845     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
846     /*
847   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
848     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
849     */
850   } else {
851     SETERRQ(1,1,"Viewer type not supported by PETSc object");
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 /*
857     This has to provide several versions.
858 
859      2) a) use only local smoothing updating outer values only once.
860         b) local smoothing updating outer values each inner iteration
861      3) color updating out values betwen colors.
862 */
863 #undef __FUNC__
864 #define __FUNC__ "MatRelax_MPIAIJ"
865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
866                            double fshift,int its,Vec xx)
867 {
868   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
869   Mat        AA = mat->A, BB = mat->B;
870   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
871   Scalar     *b,*x,*xs,*ls,d,*v,sum;
872   int        ierr,*idx, *diag;
873   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
874 
875   PetscFunctionBegin;
876   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
877   ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
878   ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
879   xs = x + shift; /* shift by one for index start of 1 */
880   ls = ls + shift;
881   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
882   diag = A->diag;
883   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
884     if (flag & SOR_ZERO_INITIAL_GUESS) {
885       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
886       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
888       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
889       PetscFunctionReturn(0);
890     }
891     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
892     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
893     while (its--) {
894       /* go down through the rows */
895       for ( i=0; i<m; i++ ) {
896         n    = A->i[i+1] - A->i[i];
897 	PLogFlops(4*n+3);
898         idx  = A->j + A->i[i] + shift;
899         v    = A->a + A->i[i] + shift;
900         sum  = b[i];
901         SPARSEDENSEMDOT(sum,xs,v,idx,n);
902         d    = fshift + A->a[diag[i]+shift];
903         n    = B->i[i+1] - B->i[i];
904         idx  = B->j + B->i[i] + shift;
905         v    = B->a + B->i[i] + shift;
906         SPARSEDENSEMDOT(sum,ls,v,idx,n);
907         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
908       }
909       /* come up through the rows */
910       for ( i=m-1; i>-1; i-- ) {
911         n    = A->i[i+1] - A->i[i];
912 	PLogFlops(4*n+3)
913         idx  = A->j + A->i[i] + shift;
914         v    = A->a + A->i[i] + shift;
915         sum  = b[i];
916         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917         d    = fshift + A->a[diag[i]+shift];
918         n    = B->i[i+1] - B->i[i];
919         idx  = B->j + B->i[i] + shift;
920         v    = B->a + B->i[i] + shift;
921         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
923       }
924     }
925   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
926     if (flag & SOR_ZERO_INITIAL_GUESS) {
927       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
928       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
930       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
931       PetscFunctionReturn(0);
932     }
933     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
934     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
935     while (its--) {
936       for ( i=0; i<m; i++ ) {
937         n    = A->i[i+1] - A->i[i];
938 	PLogFlops(4*n+3);
939         idx  = A->j + A->i[i] + shift;
940         v    = A->a + A->i[i] + shift;
941         sum  = b[i];
942         SPARSEDENSEMDOT(sum,xs,v,idx,n);
943         d    = fshift + A->a[diag[i]+shift];
944         n    = B->i[i+1] - B->i[i];
945         idx  = B->j + B->i[i] + shift;
946         v    = B->a + B->i[i] + shift;
947         SPARSEDENSEMDOT(sum,ls,v,idx,n);
948         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
949       }
950     }
951   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
952     if (flag & SOR_ZERO_INITIAL_GUESS) {
953       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
954       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
955       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
956       ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
957       PetscFunctionReturn(0);
958     }
959     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
960     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
961     while (its--) {
962       for ( i=m-1; i>-1; i-- ) {
963         n    = A->i[i+1] - A->i[i];
964 	PLogFlops(4*n+3);
965         idx  = A->j + A->i[i] + shift;
966         v    = A->a + A->i[i] + shift;
967         sum  = b[i];
968         SPARSEDENSEMDOT(sum,xs,v,idx,n);
969         d    = fshift + A->a[diag[i]+shift];
970         n    = B->i[i+1] - B->i[i];
971         idx  = B->j + B->i[i] + shift;
972         v    = B->a + B->i[i] + shift;
973         SPARSEDENSEMDOT(sum,ls,v,idx,n);
974         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
975       }
976     }
977   } else {
978     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
979   }
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
982   ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNC__
987 #define __FUNC__ "MatGetInfo_MPIAIJ"
988 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
989 {
990   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
991   Mat        A = mat->A, B = mat->B;
992   int        ierr;
993   double     isend[5], irecv[5];
994 
995   PetscFunctionBegin;
996   info->block_size     = 1.0;
997   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
998   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
999   isend[3] = info->memory;  isend[4] = info->mallocs;
1000   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1001   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1002   isend[3] += info->memory;  isend[4] += info->mallocs;
1003   if (flag == MAT_LOCAL) {
1004     info->nz_used      = isend[0];
1005     info->nz_allocated = isend[1];
1006     info->nz_unneeded  = isend[2];
1007     info->memory       = isend[3];
1008     info->mallocs      = isend[4];
1009   } else if (flag == MAT_GLOBAL_MAX) {
1010     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1011     info->nz_used      = irecv[0];
1012     info->nz_allocated = irecv[1];
1013     info->nz_unneeded  = irecv[2];
1014     info->memory       = irecv[3];
1015     info->mallocs      = irecv[4];
1016   } else if (flag == MAT_GLOBAL_SUM) {
1017     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1018     info->nz_used      = irecv[0];
1019     info->nz_allocated = irecv[1];
1020     info->nz_unneeded  = irecv[2];
1021     info->memory       = irecv[3];
1022     info->mallocs      = irecv[4];
1023   }
1024   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1025   info->fill_ratio_needed = 0;
1026   info->factor_mallocs    = 0;
1027   info->rows_global       = (double)mat->M;
1028   info->columns_global    = (double)mat->N;
1029   info->rows_local        = (double)mat->m;
1030   info->columns_local     = (double)mat->N;
1031 
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ "MatSetOption_MPIAIJ"
1037 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1038 {
1039   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1040   int        ierr;
1041 
1042   PetscFunctionBegin;
1043   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1044       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1045       op == MAT_COLUMNS_UNSORTED ||
1046       op == MAT_COLUMNS_SORTED ||
1047       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1048       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1049         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1050         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1051   } else if (op == MAT_ROW_ORIENTED) {
1052     a->roworiented = 1;
1053     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1054     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1055   } else if (op == MAT_ROWS_SORTED ||
1056              op == MAT_ROWS_UNSORTED ||
1057              op == MAT_SYMMETRIC ||
1058              op == MAT_STRUCTURALLY_SYMMETRIC ||
1059              op == MAT_YES_NEW_DIAGONALS)
1060     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1061   else if (op == MAT_COLUMN_ORIENTED) {
1062     a->roworiented = 0;
1063     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1064     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1065   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1066     a->donotstash = 1;
1067   } else if (op == MAT_NO_NEW_DIAGONALS){
1068     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1069   } else {
1070     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1071   }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNC__
1076 #define __FUNC__ "MatGetSize_MPIAIJ"
1077 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1078 {
1079   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1080 
1081   PetscFunctionBegin;
1082   if (m) *m = mat->M;
1083   if (n) *n = mat->N;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNC__
1088 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1089 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1090 {
1091   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1092 
1093   PetscFunctionBegin;
1094   if (m) *m = mat->m;
1095   if (n) *n = mat->n;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1101 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1104 
1105   PetscFunctionBegin;
1106   *m = mat->rstart; *n = mat->rend;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNC__
1111 #define __FUNC__ "MatGetRow_MPIAIJ"
1112 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1113 {
1114   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1115   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1116   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1117   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1118   int        *cmap, *idx_p;
1119 
1120   PetscFunctionBegin;
1121   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1122   mat->getrowactive = PETSC_TRUE;
1123 
1124   if (!mat->rowvalues && (idx || v)) {
1125     /*
1126         allocate enough space to hold information from the longest row.
1127     */
1128     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1129     int     max = 1,m = mat->m,tmp;
1130     for ( i=0; i<m; i++ ) {
1131       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1132       if (max < tmp) { max = tmp; }
1133     }
1134     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1135     mat->rowindices = (int *) (mat->rowvalues + max);
1136   }
1137 
1138   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1139   lrow = row - rstart;
1140 
1141   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1142   if (!v)   {pvA = 0; pvB = 0;}
1143   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1144   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1145   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1146   nztot = nzA + nzB;
1147 
1148   cmap  = mat->garray;
1149   if (v  || idx) {
1150     if (nztot) {
1151       /* Sort by increasing column numbers, assuming A and B already sorted */
1152       int imark = -1;
1153       if (v) {
1154         *v = v_p = mat->rowvalues;
1155         for ( i=0; i<nzB; i++ ) {
1156           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1157           else break;
1158         }
1159         imark = i;
1160         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1161         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1162       }
1163       if (idx) {
1164         *idx = idx_p = mat->rowindices;
1165         if (imark > -1) {
1166           for ( i=0; i<imark; i++ ) {
1167             idx_p[i] = cmap[cworkB[i]];
1168           }
1169         } else {
1170           for ( i=0; i<nzB; i++ ) {
1171             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1172             else break;
1173           }
1174           imark = i;
1175         }
1176         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1177         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1178       }
1179     } else {
1180       if (idx) *idx = 0;
1181       if (v)   *v   = 0;
1182     }
1183   }
1184   *nz = nztot;
1185   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1186   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNC__
1191 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1192 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1193 {
1194   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1195 
1196   PetscFunctionBegin;
1197   if (aij->getrowactive == PETSC_FALSE) {
1198     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1199   }
1200   aij->getrowactive = PETSC_FALSE;
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNC__
1205 #define __FUNC__ "MatNorm_MPIAIJ"
1206 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1207 {
1208   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1209   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1210   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1211   double     sum = 0.0;
1212   Scalar     *v;
1213 
1214   PetscFunctionBegin;
1215   if (aij->size == 1) {
1216     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1217   } else {
1218     if (type == NORM_FROBENIUS) {
1219       v = amat->a;
1220       for (i=0; i<amat->nz; i++ ) {
1221 #if defined(PETSC_USE_COMPLEX)
1222         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1223 #else
1224         sum += (*v)*(*v); v++;
1225 #endif
1226       }
1227       v = bmat->a;
1228       for (i=0; i<bmat->nz; i++ ) {
1229 #if defined(PETSC_USE_COMPLEX)
1230         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1231 #else
1232         sum += (*v)*(*v); v++;
1233 #endif
1234       }
1235       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1236       *norm = sqrt(*norm);
1237     } else if (type == NORM_1) { /* max column norm */
1238       double *tmp, *tmp2;
1239       int    *jj, *garray = aij->garray;
1240       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1241       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1242       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1243       *norm = 0.0;
1244       v = amat->a; jj = amat->j;
1245       for ( j=0; j<amat->nz; j++ ) {
1246         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1247       }
1248       v = bmat->a; jj = bmat->j;
1249       for ( j=0; j<bmat->nz; j++ ) {
1250         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1251       }
1252       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1253       for ( j=0; j<aij->N; j++ ) {
1254         if (tmp2[j] > *norm) *norm = tmp2[j];
1255       }
1256       ierr = PetscFree(tmp);CHKERRQ(ierr);
1257       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1258     } else if (type == NORM_INFINITY) { /* max row norm */
1259       double ntemp = 0.0;
1260       for ( j=0; j<amat->m; j++ ) {
1261         v = amat->a + amat->i[j] + shift;
1262         sum = 0.0;
1263         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1264           sum += PetscAbsScalar(*v); v++;
1265         }
1266         v = bmat->a + bmat->i[j] + shift;
1267         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1268           sum += PetscAbsScalar(*v); v++;
1269         }
1270         if (sum > ntemp) ntemp = sum;
1271       }
1272       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1273     } else {
1274       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1275     }
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNC__
1281 #define __FUNC__ "MatTranspose_MPIAIJ"
1282 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1283 {
1284   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1285   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1286   int        ierr,shift = Aloc->indexshift;
1287   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1288   Mat        B;
1289   Scalar     *array;
1290 
1291   PetscFunctionBegin;
1292   if (matout == PETSC_NULL && M != N) {
1293     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1294   }
1295 
1296   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1297 
1298   /* copy over the A part */
1299   Aloc = (Mat_SeqAIJ*) a->A->data;
1300   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1301   row = a->rstart;
1302   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1303   for ( i=0; i<m; i++ ) {
1304     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1305     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1306   }
1307   aj = Aloc->j;
1308   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1309 
1310   /* copy over the B part */
1311   Aloc = (Mat_SeqAIJ*) a->B->data;
1312   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1313   row = a->rstart;
1314   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1315   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1316   for ( i=0; i<m; i++ ) {
1317     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1318     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1319   }
1320   ierr = PetscFree(ct);CHKERRQ(ierr);
1321   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323   if (matout != PETSC_NULL) {
1324     *matout = B;
1325   } else {
1326     PetscOps       *Abops;
1327     struct _MatOps *Aops;
1328 
1329     /* This isn't really an in-place transpose .... but free data structures from a */
1330     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1331     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1332     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1333 #if defined (PETSC_USE_CTABLE)
1334     if (a->colmap) TableDelete(a->colmap);
1335 #else
1336     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1337 #endif
1338     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1339     if (a->lvec) VecDestroy(a->lvec);
1340     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1341     ierr = PetscFree(a);CHKERRQ(ierr);
1342 
1343     /*
1344        This is horrible, horrible code. We need to keep the
1345       A pointers for the bops and ops but copy everything
1346       else from C.
1347     */
1348     Abops   = A->bops;
1349     Aops    = A->ops;
1350     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1351     A->bops = Abops;
1352     A->ops  = Aops;
1353     PetscHeaderDestroy(B);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNC__
1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1361 {
1362   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1363   Mat a = aij->A, b = aij->B;
1364   int ierr,s1,s2,s3;
1365 
1366   PetscFunctionBegin;
1367   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1368   if (rr) {
1369     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1370     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1371     /* Overlap communication with computation. */
1372     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1373   }
1374   if (ll) {
1375     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1376     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1377     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1378   }
1379   /* scale  the diagonal block */
1380   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1381 
1382   if (rr) {
1383     /* Do a scatter end and then right scale the off-diagonal block */
1384     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1385     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1386   }
1387 
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1394 int MatPrintHelp_MPIAIJ(Mat A)
1395 {
1396   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1397   int        ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!a->rank) {
1401     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNC__
1407 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1408 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1409 {
1410   PetscFunctionBegin;
1411   *bs = 1;
1412   PetscFunctionReturn(0);
1413 }
1414 #undef __FUNC__
1415 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1416 int MatSetUnfactored_MPIAIJ(Mat A)
1417 {
1418   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1419   int        ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNC__
1427 #define __FUNC__ "MatEqual_MPIAIJ"
1428 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1429 {
1430   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1431   Mat        a, b, c, d;
1432   PetscTruth flg;
1433   int        ierr;
1434 
1435   PetscFunctionBegin;
1436   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1437   a = matA->A; b = matA->B;
1438   c = matB->A; d = matB->B;
1439 
1440   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1441   if (flg == PETSC_TRUE) {
1442     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1443   }
1444   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNC__
1449 #define __FUNC__ "MatCopy_MPIAIJ"
1450 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1451 {
1452   int        ierr;
1453   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1454   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1455 
1456   PetscFunctionBegin;
1457   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1458     /* because of the column compression in the off-processor part of the matrix a->B,
1459        the number of columns in a->B and b->B may be different, hence we cannot call
1460        the MatCopy() directly on the two parts. If need be, we can provide a more
1461        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1462        then copying the submatrices */
1463     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1464   } else {
1465     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1466     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1472 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1473 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1474 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1475 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1476 
1477 /* -------------------------------------------------------------------*/
1478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1479        MatGetRow_MPIAIJ,
1480        MatRestoreRow_MPIAIJ,
1481        MatMult_MPIAIJ,
1482        MatMultAdd_MPIAIJ,
1483        MatMultTrans_MPIAIJ,
1484        MatMultTransAdd_MPIAIJ,
1485        0,
1486        0,
1487        0,
1488        0,
1489        0,
1490        0,
1491        MatRelax_MPIAIJ,
1492        MatTranspose_MPIAIJ,
1493        MatGetInfo_MPIAIJ,
1494        MatEqual_MPIAIJ,
1495        MatGetDiagonal_MPIAIJ,
1496        MatDiagonalScale_MPIAIJ,
1497        MatNorm_MPIAIJ,
1498        MatAssemblyBegin_MPIAIJ,
1499        MatAssemblyEnd_MPIAIJ,
1500        0,
1501        MatSetOption_MPIAIJ,
1502        MatZeroEntries_MPIAIJ,
1503        MatZeroRows_MPIAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508        MatGetSize_MPIAIJ,
1509        MatGetLocalSize_MPIAIJ,
1510        MatGetOwnershipRange_MPIAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515        MatDuplicate_MPIAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520        0,
1521        MatGetSubMatrices_MPIAIJ,
1522        MatIncreaseOverlap_MPIAIJ,
1523        MatGetValues_MPIAIJ,
1524        MatCopy_MPIAIJ,
1525        MatPrintHelp_MPIAIJ,
1526        MatScale_MPIAIJ,
1527        0,
1528        0,
1529        0,
1530        MatGetBlockSize_MPIAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535        MatFDColoringCreate_MPIAIJ,
1536        0,
1537        MatSetUnfactored_MPIAIJ,
1538        0,
1539        0,
1540        MatGetSubMatrix_MPIAIJ,
1541        0,
1542        0,
1543        MatGetMaps_Petsc};
1544 
1545 /* ----------------------------------------------------------------------------------------*/
1546 
1547 EXTERN_C_BEGIN
1548 #undef __FUNC__
1549 #define __FUNC__ "MatStoreValues_MPIAIJ"
1550 int MatStoreValues_MPIAIJ(Mat mat)
1551 {
1552   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1553   int        ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1557   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNC__
1564 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1565 int MatRetrieveValues_MPIAIJ(Mat mat)
1566 {
1567   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1568   int        ierr;
1569 
1570   PetscFunctionBegin;
1571   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1572   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 #include "pc.h"
1578 EXTERN_C_BEGIN
1579 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1580 EXTERN_C_END
1581 
1582 #undef __FUNC__
1583 #define __FUNC__ "MatCreateMPIAIJ"
1584 /*@C
1585    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1586    (the default parallel PETSc format).  For good matrix assembly performance
1587    the user should preallocate the matrix storage by setting the parameters
1588    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1589    performance can be increased by more than a factor of 50.
1590 
1591    Collective on MPI_Comm
1592 
1593    Input Parameters:
1594 +  comm - MPI communicator
1595 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1596            This value should be the same as the local size used in creating the
1597            y vector for the matrix-vector product y = Ax.
1598 .  n - This value should be the same as the local size used in creating the
1599        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1600        calculated if N is given) For square matrices n is almost always m.
1601 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1602 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1603 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1604            (same value is used for all local rows)
1605 .  d_nnz - array containing the number of nonzeros in the various rows of the
1606            DIAGONAL portion of the local submatrix (possibly different for each row)
1607            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1608            The size of this array is equal to the number of local rows, i.e 'm'.
1609            You must leave room for the diagonal entry even if it is zero.
1610 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1611            submatrix (same value is used for all local rows).
1612 -  o_nnz - array containing the number of nonzeros in the various rows of the
1613            OFF-DIAGONAL portion of the local submatrix (possibly different for
1614            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1615            structure. The size of this array is equal to the number
1616            of local rows, i.e 'm'.
1617 
1618    Output Parameter:
1619 .  A - the matrix
1620 
1621    Notes:
1622    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1623    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1624    storage requirements for this matrix.
1625 
1626    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1627    processor than it must be used on all processors that share the object for
1628    that argument.
1629 
1630    The AIJ format (also called the Yale sparse matrix format or
1631    compressed row storage), is fully compatible with standard Fortran 77
1632    storage.  That is, the stored row and column indices can begin at
1633    either one (as in Fortran) or zero.  See the users manual for details.
1634 
1635    The user MUST specify either the local or global matrix dimensions
1636    (possibly both).
1637 
1638    The parallel matrix is partitioned such that the first m0 rows belong to
1639    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1640    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1641 
1642    The DIAGONAL portion of the local submatrix of a processor can be defined
1643    as the submatrix which is obtained by extraction the part corresponding
1644    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1645    first row that belongs to the processor, and r2 is the last row belonging
1646    to the this processor. This is a square mxm matrix. The remaining portion
1647    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1648 
1649    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1650 
1651    By default, this format uses inodes (identical nodes) when possible.
1652    We search for consecutive rows with the same nonzero structure, thereby
1653    reusing matrix information to achieve increased efficiency.
1654 
1655    Options Database Keys:
1656 +  -mat_aij_no_inode  - Do not use inodes
1657 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1658 -  -mat_aij_oneindex - Internally use indexing starting at 1
1659         rather than 0.  Note that when calling MatSetValues(),
1660         the user still MUST index entries starting at 0!
1661 .   -mat_mpi - use the parallel matrix data structures even on one processor
1662                (defaults to using SeqBAIJ format on one processor)
1663 .   -mat_mpi - use the parallel matrix data structures even on one processor
1664                (defaults to using SeqAIJ format on one processor)
1665 
1666 
1667    Example usage:
1668 
1669    Consider the following 8x8 matrix with 34 non-zero values, that is
1670    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1671    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1672    as follows:
1673 
1674 .vb
1675             1  2  0  |  0  3  0  |  0  4
1676     Proc0   0  5  6  |  7  0  0  |  8  0
1677             9  0 10  | 11  0  0  | 12  0
1678     -------------------------------------
1679            13  0 14  | 15 16 17  |  0  0
1680     Proc1   0 18  0  | 19 20 21  |  0  0
1681             0  0  0  | 22 23  0  | 24  0
1682     -------------------------------------
1683     Proc2  25 26 27  |  0  0 28  | 29  0
1684            30  0  0  | 31 32 33  |  0 34
1685 .ve
1686 
1687    This can be represented as a collection of submatrices as:
1688 
1689 .vb
1690       A B C
1691       D E F
1692       G H I
1693 .ve
1694 
1695    Where the submatrices A,B,C are owned by proc0, D,E,F are
1696    owned by proc1, G,H,I are owned by proc2.
1697 
1698    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1699    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1700    The 'M','N' parameters are 8,8, and have the same values on all procs.
1701 
1702    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1703    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1704    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1705    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1706    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1707    matrix, ans [DF] as another SeqAIJ matrix.
1708 
1709    When d_nz, o_nz parameters are specified, d_nz storage elements are
1710    allocated for every row of the local diagonal submatrix, and o_nz
1711    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1712    One way to choose d_nz and o_nz is to use the max nonzerors per local
1713    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1714    In this case, the values of d_nz,o_nz are:
1715 .vb
1716      proc0 : dnz = 2, o_nz = 2
1717      proc1 : dnz = 3, o_nz = 2
1718      proc2 : dnz = 1, o_nz = 4
1719 .ve
1720    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1721    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1722    for proc3. i.e we are using 12+15+10=37 storage locations to store
1723    34 values.
1724 
1725    When d_nnz, o_nnz parameters are specified, the storage is specified
1726    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1727    In the above case the values for d_nnz,o_nnz are:
1728 .vb
1729      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1730      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1731      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1732 .ve
1733    Here the space allocated is sum of all the above values i.e 34, and
1734    hence pre-allocation is perfect.
1735 
1736    Level: intermediate
1737 
1738 .keywords: matrix, aij, compressed row, sparse, parallel
1739 
1740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1741 @*/
1742 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1743 {
1744   Mat          B;
1745   Mat_MPIAIJ   *b;
1746   int          ierr, i,size,flag1 = 0, flag2 = 0;
1747 
1748   PetscFunctionBegin;
1749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1750   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1751   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1752   if (!flag1 && !flag2 && size == 1) {
1753     if (M == PETSC_DECIDE) M = m;
1754     if (N == PETSC_DECIDE) N = n;
1755     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1756     PetscFunctionReturn(0);
1757   }
1758 
1759   *A = 0;
1760   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1761   PLogObjectCreate(B);
1762   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1763   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1764   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1765   B->ops->destroy = MatDestroy_MPIAIJ;
1766   B->ops->view    = MatView_MPIAIJ;
1767   B->factor       = 0;
1768   B->assembled    = PETSC_FALSE;
1769   B->mapping      = 0;
1770 
1771   B->insertmode      = NOT_SET_VALUES;
1772   b->size            = size;
1773   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1774 
1775   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1776     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1777   }
1778 
1779   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1780   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1781   b->m = m; B->m = m;
1782   b->n = n; B->n = n;
1783   b->N = N; B->N = N;
1784   b->M = M; B->M = M;
1785 
1786   /* the information in the maps duplicates the information computed below, eventually
1787      we should remove the duplicate information that is not contained in the maps */
1788   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1789   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1790 
1791   /* build local table of row and column ownerships */
1792   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1793   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1794   b->cowners = b->rowners + b->size + 2;
1795   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1796   b->rowners[0] = 0;
1797   for ( i=2; i<=b->size; i++ ) {
1798     b->rowners[i] += b->rowners[i-1];
1799   }
1800   b->rstart = b->rowners[b->rank];
1801   b->rend   = b->rowners[b->rank+1];
1802   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1803   b->cowners[0] = 0;
1804   for ( i=2; i<=b->size; i++ ) {
1805     b->cowners[i] += b->cowners[i-1];
1806   }
1807   b->cstart = b->cowners[b->rank];
1808   b->cend   = b->cowners[b->rank+1];
1809 
1810   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1811   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1812   PLogObjectParent(B,b->A);
1813   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1814   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1815   PLogObjectParent(B,b->B);
1816 
1817   /* build cache for off array entries formed */
1818   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1819   b->donotstash  = 0;
1820   b->colmap      = 0;
1821   b->garray      = 0;
1822   b->roworiented = 1;
1823 
1824   /* stuff used for matrix vector multiply */
1825   b->lvec      = 0;
1826   b->Mvctx     = 0;
1827 
1828   /* stuff for MatGetRow() */
1829   b->rowindices   = 0;
1830   b->rowvalues    = 0;
1831   b->getrowactive = PETSC_FALSE;
1832 
1833   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1834                                      "MatStoreValues_MPIAIJ",
1835                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1836   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1837                                      "MatRetrieveValues_MPIAIJ",
1838                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1839   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1840                                      "MatGetDiagonalBlock_MPIAIJ",
1841                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1842   *A = B;
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNC__
1847 #define __FUNC__ "MatDuplicate_MPIAIJ"
1848 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1849 {
1850   Mat        mat;
1851   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1852   int        ierr, len=0, flg;
1853 
1854   PetscFunctionBegin;
1855   *newmat       = 0;
1856   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1857   PLogObjectCreate(mat);
1858   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1859   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1860   mat->ops->destroy = MatDestroy_MPIAIJ;
1861   mat->ops->view    = MatView_MPIAIJ;
1862   mat->factor       = matin->factor;
1863   mat->assembled    = PETSC_TRUE;
1864   mat->insertmode   = NOT_SET_VALUES;
1865 
1866   a->m = mat->m   = oldmat->m;
1867   a->n = mat->n   = oldmat->n;
1868   a->M = mat->M   = oldmat->M;
1869   a->N = mat->N   = oldmat->N;
1870 
1871   a->rstart       = oldmat->rstart;
1872   a->rend         = oldmat->rend;
1873   a->cstart       = oldmat->cstart;
1874   a->cend         = oldmat->cend;
1875   a->size         = oldmat->size;
1876   a->rank         = oldmat->rank;
1877   a->donotstash   = oldmat->donotstash;
1878   a->roworiented  = oldmat->roworiented;
1879   a->rowindices   = 0;
1880   a->rowvalues    = 0;
1881   a->getrowactive = PETSC_FALSE;
1882 
1883   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1884   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1885   a->cowners = a->rowners + a->size + 2;
1886   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1887   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1888   if (oldmat->colmap) {
1889 #if defined (PETSC_USE_CTABLE)
1890     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1891 #else
1892     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1893     PLogObjectMemory(mat,(a->N)*sizeof(int));
1894     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1895 #endif
1896   } else a->colmap = 0;
1897   if (oldmat->garray) {
1898     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1899     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1900     PLogObjectMemory(mat,len*sizeof(int));
1901     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1902   } else a->garray = 0;
1903 
1904   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1905   PLogObjectParent(mat,a->lvec);
1906   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1907   PLogObjectParent(mat,a->Mvctx);
1908   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1909   PLogObjectParent(mat,a->A);
1910   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1911   PLogObjectParent(mat,a->B);
1912   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1913   if (flg) {
1914     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1915   }
1916   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1917   *newmat = mat;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #include "sys.h"
1922 
1923 #undef __FUNC__
1924 #define __FUNC__ "MatLoad_MPIAIJ"
1925 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1926 {
1927   Mat          A;
1928   Scalar       *vals,*svals;
1929   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1930   MPI_Status   status;
1931   int          i, nz, ierr, j,rstart, rend, fd;
1932   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1933   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1934   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1935 
1936   PetscFunctionBegin;
1937   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1938   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1939   if (!rank) {
1940     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1941     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1942     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1943     if (header[3] < 0) {
1944       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1945     }
1946   }
1947 
1948   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1949   M = header[1]; N = header[2];
1950   /* determine ownership of all rows */
1951   m = M/size + ((M % size) > rank);
1952   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1953   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1954   rowners[0] = 0;
1955   for ( i=2; i<=size; i++ ) {
1956     rowners[i] += rowners[i-1];
1957   }
1958   rstart = rowners[rank];
1959   rend   = rowners[rank+1];
1960 
1961   /* distribute row lengths to all processors */
1962   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1963   offlens = ourlens + (rend-rstart);
1964   if (!rank) {
1965     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1966     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1967     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
1968     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1969     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1970     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1971   } else {
1972     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
1973   }
1974 
1975   if (!rank) {
1976     /* calculate the number of nonzeros on each processor */
1977     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
1978     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1979     for ( i=0; i<size; i++ ) {
1980       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1981         procsnz[i] += rowlengths[j];
1982       }
1983     }
1984     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1985 
1986     /* determine max buffer needed and allocate it */
1987     maxnz = 0;
1988     for ( i=0; i<size; i++ ) {
1989       maxnz = PetscMax(maxnz,procsnz[i]);
1990     }
1991     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
1992 
1993     /* read in my part of the matrix column indices  */
1994     nz     = procsnz[0];
1995     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
1996     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1997 
1998     /* read in every one elses and ship off */
1999     for ( i=1; i<size; i++ ) {
2000       nz   = procsnz[i];
2001       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2002       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2003     }
2004     ierr = PetscFree(cols);CHKERRQ(ierr);
2005   } else {
2006     /* determine buffer space needed for message */
2007     nz = 0;
2008     for ( i=0; i<m; i++ ) {
2009       nz += ourlens[i];
2010     }
2011     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2012 
2013     /* receive message of column indices*/
2014     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2015     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2016     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2017   }
2018 
2019   /* determine column ownership if matrix is not square */
2020   if (N != M) {
2021     n      = N/size + ((N % size) > rank);
2022     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2023     cstart = cend - n;
2024   } else {
2025     cstart = rstart;
2026     cend   = rend;
2027     n      = cend - cstart;
2028   }
2029 
2030   /* loop over local rows, determining number of off diagonal entries */
2031   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2032   jj = 0;
2033   for ( i=0; i<m; i++ ) {
2034     for ( j=0; j<ourlens[i]; j++ ) {
2035       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2036       jj++;
2037     }
2038   }
2039 
2040   /* create our matrix */
2041   for ( i=0; i<m; i++ ) {
2042     ourlens[i] -= offlens[i];
2043   }
2044   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2045   A = *newmat;
2046   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2047   for ( i=0; i<m; i++ ) {
2048     ourlens[i] += offlens[i];
2049   }
2050 
2051   if (!rank) {
2052     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2053 
2054     /* read in my part of the matrix numerical values  */
2055     nz   = procsnz[0];
2056     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2057 
2058     /* insert into matrix */
2059     jj      = rstart;
2060     smycols = mycols;
2061     svals   = vals;
2062     for ( i=0; i<m; i++ ) {
2063       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2064       smycols += ourlens[i];
2065       svals   += ourlens[i];
2066       jj++;
2067     }
2068 
2069     /* read in other processors and ship out */
2070     for ( i=1; i<size; i++ ) {
2071       nz   = procsnz[i];
2072       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2073       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2074     }
2075     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2076   } else {
2077     /* receive numeric values */
2078     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2079 
2080     /* receive message of values*/
2081     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2082     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2083     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2084 
2085     /* insert into matrix */
2086     jj      = rstart;
2087     smycols = mycols;
2088     svals   = vals;
2089     for ( i=0; i<m; i++ ) {
2090       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2091       smycols += ourlens[i];
2092       svals   += ourlens[i];
2093       jj++;
2094     }
2095   }
2096   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2097   ierr = PetscFree(vals);CHKERRQ(ierr);
2098   ierr = PetscFree(mycols);CHKERRQ(ierr);
2099   ierr = PetscFree(rowners);CHKERRQ(ierr);
2100 
2101   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2102   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #undef __FUNC__
2107 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2108 /*
2109     Not great since it makes two copies of the submatrix, first an SeqAIJ
2110   in local and then by concatenating the local matrices the end result.
2111   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2112 */
2113 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2114 {
2115   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2116   Mat        *local,M, Mreuse;
2117   Scalar     *vwork,*aa;
2118   MPI_Comm   comm = mat->comm;
2119   Mat_SeqAIJ *aij;
2120   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2121 
2122   PetscFunctionBegin;
2123   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2125 
2126   if (call ==  MAT_REUSE_MATRIX) {
2127     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2128     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2129     local = &Mreuse;
2130     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2131   } else {
2132     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2133     Mreuse = *local;
2134     ierr = PetscFree(local);CHKERRQ(ierr);
2135   }
2136 
2137   /*
2138       m - number of local rows
2139       n - number of columns (same on all processors)
2140       rstart - first row in new global matrix generated
2141   */
2142   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2143   if (call == MAT_INITIAL_MATRIX) {
2144     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2145     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2146     ii  = aij->i;
2147     jj  = aij->j;
2148 
2149     /*
2150         Determine the number of non-zeros in the diagonal and off-diagonal
2151         portions of the matrix in order to do correct preallocation
2152     */
2153 
2154     /* first get start and end of "diagonal" columns */
2155     if (csize == PETSC_DECIDE) {
2156       nlocal = n/size + ((n % size) > rank);
2157     } else {
2158       nlocal = csize;
2159     }
2160     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2161     rstart = rend - nlocal;
2162     if (rank == size - 1 && rend != n) {
2163       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2164     }
2165 
2166     /* next, compute all the lengths */
2167     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2168     olens = dlens + m;
2169     for ( i=0; i<m; i++ ) {
2170       jend = ii[i+1] - ii[i];
2171       olen = 0;
2172       dlen = 0;
2173       for ( j=0; j<jend; j++ ) {
2174         if ( *jj < rstart || *jj >= rend) olen++;
2175         else dlen++;
2176         jj++;
2177       }
2178       olens[i] = olen;
2179       dlens[i] = dlen;
2180     }
2181     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2182     ierr = PetscFree(dlens);CHKERRQ(ierr);
2183   } else {
2184     int ml,nl;
2185 
2186     M = *newmat;
2187     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2188     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2189     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2190     /*
2191          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2192        rather than the slower MatSetValues().
2193     */
2194     M->was_assembled = PETSC_TRUE;
2195     M->assembled     = PETSC_FALSE;
2196   }
2197   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2198   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2199   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2200   ii  = aij->i;
2201   jj  = aij->j;
2202   aa  = aij->a;
2203   for (i=0; i<m; i++) {
2204     row   = rstart + i;
2205     nz    = ii[i+1] - ii[i];
2206     cwork = jj;     jj += nz;
2207     vwork = aa;     aa += nz;
2208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2209   }
2210 
2211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2213   *newmat = M;
2214 
2215   /* save submatrix used in processor for next request */
2216   if (call ==  MAT_INITIAL_MATRIX) {
2217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2219   }
2220 
2221   PetscFunctionReturn(0);
2222 }
2223