xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6550863b4fa1924fb7aed57c971a3f7e3e1b3e91)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
426 int MatZeroEntries_MPIAIJ(Mat A)
427 {
428   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
429   int        ierr;
430 
431   PetscFunctionBegin;
432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatZeroRows_MPIAIJ"
439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
440 {
441   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
442   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
443   int            *nprocs,j,idx,nsends,row;
444   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
445   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
446   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
447   MPI_Comm       comm = A->comm;
448   MPI_Request    *send_waits,*recv_waits;
449   MPI_Status     recv_status,*send_status;
450   IS             istmp;
451   PetscTruth     found;
452 
453   PetscFunctionBegin;
454   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
455   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
456 
457   /*  first count number of contributors to each processor */
458   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
459   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
460   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
461   for (i=0; i<N; i++) {
462     idx = rows[i];
463     found = PETSC_FALSE;
464     for (j=0; j<size; j++) {
465       if (idx >= owners[j] && idx < owners[j+1]) {
466         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
467       }
468     }
469     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
470   }
471   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
472 
473   /* inform other processors of number of messages and max length*/
474   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
475 
476   /* post receives:   */
477   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
478   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
479   for (i=0; i<nrecvs; i++) {
480     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
481   }
482 
483   /* do sends:
484       1) starts[i] gives the starting index in svalues for stuff going to
485          the ith processor
486   */
487   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
488   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
490   starts[0] = 0;
491   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   for (i=0; i<N; i++) {
493     svalues[starts[owner[i]]++] = rows[i];
494   }
495   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
496 
497   starts[0] = 0;
498   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
499   count = 0;
500   for (i=0; i<size; i++) {
501     if (nprocs[2*i+1]) {
502       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
503     }
504   }
505   ierr = PetscFree(starts);CHKERRQ(ierr);
506 
507   base = owners[rank];
508 
509   /*  wait on receives */
510   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
511   source = lens + nrecvs;
512   count  = nrecvs; slen = 0;
513   while (count) {
514     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
515     /* unpack receives into our local space */
516     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
517     source[imdex]  = recv_status.MPI_SOURCE;
518     lens[imdex]    = n;
519     slen          += n;
520     count--;
521   }
522   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
523 
524   /* move the data into the send scatter */
525   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
526   count = 0;
527   for (i=0; i<nrecvs; i++) {
528     values = rvalues + i*nmax;
529     for (j=0; j<lens[i]; j++) {
530       lrows[count++] = values[j] - base;
531     }
532   }
533   ierr = PetscFree(rvalues);CHKERRQ(ierr);
534   ierr = PetscFree(lens);CHKERRQ(ierr);
535   ierr = PetscFree(owner);CHKERRQ(ierr);
536   ierr = PetscFree(nprocs);CHKERRQ(ierr);
537 
538   /* actually zap the local rows */
539   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540   PetscLogObjectParent(A,istmp);
541 
542   /*
543         Zero the required rows. If the "diagonal block" of the matrix
544      is square and the user wishes to set the diagonal we use seperate
545      code so that MatSetValues() is not called for each diagonal allocating
546      new memory, thus calling lots of mallocs and slowing things down.
547 
548        Contributed by: Mathew Knepley
549   */
550   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
551   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
552   if (diag && (l->A->M == l->A->N)) {
553     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
554   } else if (diag) {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
557       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
559     }
560     for (i = 0; i < slen; i++) {
561       row  = lrows[i] + rstart;
562       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
563     }
564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   } else {
567     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
568   }
569   ierr = ISDestroy(istmp);CHKERRQ(ierr);
570   ierr = PetscFree(lrows);CHKERRQ(ierr);
571 
572   /* wait on sends */
573   if (nsends) {
574     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
575     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
576     ierr = PetscFree(send_status);CHKERRQ(ierr);
577   }
578   ierr = PetscFree(send_waits);CHKERRQ(ierr);
579   ierr = PetscFree(svalues);CHKERRQ(ierr);
580 
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMult_MPIAIJ"
586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr,nt;
590 
591   PetscFunctionBegin;
592   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
593   if (nt != A->n) {
594     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
595   }
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNCT__
604 #define __FUNCT__ "MatMultAdd_MPIAIJ"
605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
613   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
621 {
622   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623   int        ierr;
624 
625   PetscFunctionBegin;
626   /* do nondiagonal part */
627   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
628   /* send it on its way */
629   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   /* do local part */
631   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
632   /* receive remote parts: note this assumes the values are not actually */
633   /* inserted in yy until the next line, which is true for my implementation*/
634   /* but is not perhaps always true. */
635   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 EXTERN_C_BEGIN
640 #undef __FUNCT__
641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
643 {
644   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
645   Mat        A = Aij->A, B,Aoff = Aij->B,Boff;
646   MatType    type;
647   IS         Me,Notme;
648   int        M,N,first,last,*notme,i, ierr;
649 
650   PetscFunctionBegin;
651 
652   /* Compatible types */
653   ierr = MatGetType(Bmat,&type); CHKERRQ(ierr);
654   ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr);
655   if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too");
656 
657   /* Easy test: symmetric diagonal block */
658   Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B;
659   ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr);
660   if (!*f) PetscFunctionReturn(0);
661 
662   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
663   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
664   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
665   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
666   for (i=0; i<first; i++) notme[i] = i;
667   for (i=last; i<M; i++) notme[i-last+first] = i;
668   ierr = ISCreateGeneral
669     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
670   ierr = ISCreateStride
671     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
672   ierr = MatGetSubMatrix
673     (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr);
674   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
675   ierr = MatDestroy(Boff); CHKERRQ(ierr);
676   ierr = ISDestroy(Me); CHKERRQ(ierr);
677   ierr = ISDestroy(Notme); CHKERRQ(ierr);
678 
679   PetscFunctionReturn(0);
680 }
681 EXTERN_C_END
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
685 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
686 {
687   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
688   int        ierr;
689 
690   PetscFunctionBegin;
691   /* do nondiagonal part */
692   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
693   /* send it on its way */
694   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
695   /* do local part */
696   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
697   /* receive remote parts: note this assumes the values are not actually */
698   /* inserted in yy until the next line, which is true for my implementation*/
699   /* but is not perhaps always true. */
700   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 /*
705   This only works correctly for square matrices where the subblock A->A is the
706    diagonal block
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
710 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
711 {
712   int        ierr;
713   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
714 
715   PetscFunctionBegin;
716   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
717   if (a->rstart != a->cstart || a->rend != a->cend) {
718     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
719   }
720   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatScale_MPIAIJ"
726 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
727 {
728   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
729   int        ierr;
730 
731   PetscFunctionBegin;
732   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
733   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatDestroy_MPIAIJ"
739 int MatDestroy_MPIAIJ(Mat mat)
740 {
741   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
742   int        ierr;
743 
744   PetscFunctionBegin;
745 #if defined(PETSC_USE_LOG)
746   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
747 #endif
748   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
749   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
750   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
751   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
752 #if defined (PETSC_USE_CTABLE)
753   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
754 #else
755   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
756 #endif
757   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
758   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
759   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
760   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
761   ierr = PetscFree(aij);CHKERRQ(ierr);
762   PetscFunctionReturn(0);
763 }
764 
765 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
766 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPIAIJ_Binary"
770 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
773   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
774   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
775   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
776   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
777   PetscScalar       *column_values;
778 
779   PetscFunctionBegin;
780   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
781   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
782   nz   = A->nz + B->nz;
783   if (rank == 0) {
784     header[0] = MAT_FILE_COOKIE;
785     header[1] = mat->M;
786     header[2] = mat->N;
787     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
788     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
789     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
790     /* get largest number of rows any processor has */
791     rlen = mat->m;
792     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
793     for (i=1; i<size; i++) {
794       rlen = PetscMax(rlen,range[i+1] - range[i]);
795     }
796   } else {
797     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
798     rlen = mat->m;
799   }
800 
801   /* load up the local row counts */
802   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
803   for (i=0; i<mat->m; i++) {
804     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
805   }
806 
807   /* store the row lengths to the file */
808   if (rank == 0) {
809     MPI_Status status;
810     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
811     for (i=1; i<size; i++) {
812       rlen = range[i+1] - range[i];
813       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
814       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
815     }
816   } else {
817     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
818   }
819   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
820 
821   /* load up the local column indices */
822   nzmax = nz; /* )th processor needs space a largest processor needs */
823   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
824   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
825   cnt  = 0;
826   for (i=0; i<mat->m; i++) {
827     for (j=B->i[i]; j<B->i[i+1]; j++) {
828       if ( (col = garray[B->j[j]]) > cstart) break;
829       column_indices[cnt++] = col;
830     }
831     for (k=A->i[i]; k<A->i[i+1]; k++) {
832       column_indices[cnt++] = A->j[k] + cstart;
833     }
834     for (; j<B->i[i+1]; j++) {
835       column_indices[cnt++] = garray[B->j[j]];
836     }
837   }
838   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
839 
840   /* store the column indices to the file */
841   if (rank == 0) {
842     MPI_Status status;
843     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
844     for (i=1; i<size; i++) {
845       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
846       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
847       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
848       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
849     }
850   } else {
851     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
852     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
853   }
854   ierr = PetscFree(column_indices);CHKERRQ(ierr);
855 
856   /* load up the local column values */
857   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
858   cnt  = 0;
859   for (i=0; i<mat->m; i++) {
860     for (j=B->i[i]; j<B->i[i+1]; j++) {
861       if ( garray[B->j[j]] > cstart) break;
862       column_values[cnt++] = B->a[j];
863     }
864     for (k=A->i[i]; k<A->i[i+1]; k++) {
865       column_values[cnt++] = A->a[k];
866     }
867     for (; j<B->i[i+1]; j++) {
868       column_values[cnt++] = B->a[j];
869     }
870   }
871   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
872 
873   /* store the column values to the file */
874   if (rank == 0) {
875     MPI_Status status;
876     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
877     for (i=1; i<size; i++) {
878       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
879       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
880       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
881       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
882     }
883   } else {
884     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
885     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(column_values);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
893 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
894 {
895   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
896   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
897   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
898   PetscTruth        isdraw,isascii,flg,isbinary;
899   PetscViewer       sviewer;
900   PetscViewerFormat format;
901 
902   PetscFunctionBegin;
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
906   if (isascii) {
907     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
908     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
909       MatInfo info;
910       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
911       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
912       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
913       if (flg) {
914         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
915 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
916       } else {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
918 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
919       }
920       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
921       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
922       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
923       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
924       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
925       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
926       PetscFunctionReturn(0);
927     } else if (format == PETSC_VIEWER_ASCII_INFO) {
928       PetscFunctionReturn(0);
929     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
930 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
931       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
932 #endif
933 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
934       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
935 #endif
936 
937       PetscFunctionReturn(0);
938     }
939   } else if (isbinary) {
940     if (size == 1) {
941       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
942       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
943     } else {
944       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
945     }
946     PetscFunctionReturn(0);
947   } else if (isdraw) {
948     PetscDraw  draw;
949     PetscTruth isnull;
950     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
951     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
952   }
953 
954   if (size == 1) {
955     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
956     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
957   } else {
958     /* assemble the entire matrix onto first processor. */
959     Mat         A;
960     Mat_SeqAIJ *Aloc;
961     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
962     PetscScalar *a;
963 
964     if (!rank) {
965       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
966     } else {
967       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
968     }
969     PetscLogObjectParent(mat,A);
970 
971     /* copy over the A part */
972     Aloc = (Mat_SeqAIJ*)aij->A->data;
973     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
974     row = aij->rstart;
975     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
976     for (i=0; i<m; i++) {
977       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
978       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
979     }
980     aj = Aloc->j;
981     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
982 
983     /* copy over the B part */
984     Aloc = (Mat_SeqAIJ*)aij->B->data;
985     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
986     row  = aij->rstart;
987     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
988     ct   = cols;
989     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
990     for (i=0; i<m; i++) {
991       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
992       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
993     }
994     ierr = PetscFree(ct);CHKERRQ(ierr);
995     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
997     /*
998        Everyone has to call to draw the matrix since the graphics waits are
999        synchronized across all processors that share the PetscDraw object
1000     */
1001     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1002     if (!rank) {
1003       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1004       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1005     }
1006     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1007     ierr = MatDestroy(A);CHKERRQ(ierr);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatView_MPIAIJ"
1014 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1015 {
1016   int        ierr;
1017   PetscTruth isascii,isdraw,issocket,isbinary;
1018 
1019   PetscFunctionBegin;
1020   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1021   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1022   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1023   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1024   if (isascii || isdraw || isbinary || issocket) {
1025     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1026   } else {
1027     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1028   }
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatRelax_MPIAIJ"
1036 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1037 {
1038   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1039   int          ierr;
1040   Vec          bb1;
1041   PetscScalar  mone=-1.0;
1042 
1043   PetscFunctionBegin;
1044   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1045 
1046   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1047 
1048   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053 
1054     while (its--) {
1055       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057 
1058       /* update rhs: bb1 = bb - B*x */
1059       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1060       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1061 
1062       /* local sweep */
1063       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1064       CHKERRQ(ierr);
1065     }
1066   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1067     if (flag & SOR_ZERO_INITIAL_GUESS) {
1068       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1069       its--;
1070     }
1071     while (its--) {
1072       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074 
1075       /* update rhs: bb1 = bb - B*x */
1076       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1077       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1078 
1079       /* local sweep */
1080       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1081       CHKERRQ(ierr);
1082     }
1083   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1084     if (flag & SOR_ZERO_INITIAL_GUESS) {
1085       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1086       its--;
1087     }
1088     while (its--) {
1089       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1090       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1091 
1092       /* update rhs: bb1 = bb - B*x */
1093       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1094       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1095 
1096       /* local sweep */
1097       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1098       CHKERRQ(ierr);
1099     }
1100   } else {
1101     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1102   }
1103 
1104   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1110 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1111 {
1112   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1113   Mat        A = mat->A,B = mat->B;
1114   int        ierr;
1115   PetscReal  isend[5],irecv[5];
1116 
1117   PetscFunctionBegin;
1118   info->block_size     = 1.0;
1119   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1120   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1121   isend[3] = info->memory;  isend[4] = info->mallocs;
1122   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1123   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1124   isend[3] += info->memory;  isend[4] += info->mallocs;
1125   if (flag == MAT_LOCAL) {
1126     info->nz_used      = isend[0];
1127     info->nz_allocated = isend[1];
1128     info->nz_unneeded  = isend[2];
1129     info->memory       = isend[3];
1130     info->mallocs      = isend[4];
1131   } else if (flag == MAT_GLOBAL_MAX) {
1132     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1133     info->nz_used      = irecv[0];
1134     info->nz_allocated = irecv[1];
1135     info->nz_unneeded  = irecv[2];
1136     info->memory       = irecv[3];
1137     info->mallocs      = irecv[4];
1138   } else if (flag == MAT_GLOBAL_SUM) {
1139     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1140     info->nz_used      = irecv[0];
1141     info->nz_allocated = irecv[1];
1142     info->nz_unneeded  = irecv[2];
1143     info->memory       = irecv[3];
1144     info->mallocs      = irecv[4];
1145   }
1146   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1147   info->fill_ratio_needed = 0;
1148   info->factor_mallocs    = 0;
1149   info->rows_global       = (double)matin->M;
1150   info->columns_global    = (double)matin->N;
1151   info->rows_local        = (double)matin->m;
1152   info->columns_local     = (double)matin->N;
1153 
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatSetOption_MPIAIJ"
1159 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1160 {
1161   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1162   int        ierr;
1163 
1164   PetscFunctionBegin;
1165   switch (op) {
1166   case MAT_NO_NEW_NONZERO_LOCATIONS:
1167   case MAT_YES_NEW_NONZERO_LOCATIONS:
1168   case MAT_COLUMNS_UNSORTED:
1169   case MAT_COLUMNS_SORTED:
1170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1171   case MAT_KEEP_ZEROED_ROWS:
1172   case MAT_NEW_NONZERO_LOCATION_ERR:
1173   case MAT_USE_INODES:
1174   case MAT_DO_NOT_USE_INODES:
1175   case MAT_IGNORE_ZERO_ENTRIES:
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_ROW_ORIENTED:
1180     a->roworiented = PETSC_TRUE;
1181     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1182     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1183     break;
1184   case MAT_ROWS_SORTED:
1185   case MAT_ROWS_UNSORTED:
1186   case MAT_YES_NEW_DIAGONALS:
1187     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1188     break;
1189   case MAT_COLUMN_ORIENTED:
1190     a->roworiented = PETSC_FALSE;
1191     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1192     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1193     break;
1194   case MAT_IGNORE_OFF_PROC_ENTRIES:
1195     a->donotstash = PETSC_TRUE;
1196     break;
1197   case MAT_NO_NEW_DIAGONALS:
1198     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1199   default:
1200     SETERRQ(PETSC_ERR_SUP,"unknown option");
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatGetRow_MPIAIJ"
1207 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1208 {
1209   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1210   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1211   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1212   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1213   int          *cmap,*idx_p;
1214 
1215   PetscFunctionBegin;
1216   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1217   mat->getrowactive = PETSC_TRUE;
1218 
1219   if (!mat->rowvalues && (idx || v)) {
1220     /*
1221         allocate enough space to hold information from the longest row.
1222     */
1223     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1224     int     max = 1,tmp;
1225     for (i=0; i<matin->m; i++) {
1226       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1227       if (max < tmp) { max = tmp; }
1228     }
1229     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1230     mat->rowindices = (int*)(mat->rowvalues + max);
1231   }
1232 
1233   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1234   lrow = row - rstart;
1235 
1236   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1237   if (!v)   {pvA = 0; pvB = 0;}
1238   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1239   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1240   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1241   nztot = nzA + nzB;
1242 
1243   cmap  = mat->garray;
1244   if (v  || idx) {
1245     if (nztot) {
1246       /* Sort by increasing column numbers, assuming A and B already sorted */
1247       int imark = -1;
1248       if (v) {
1249         *v = v_p = mat->rowvalues;
1250         for (i=0; i<nzB; i++) {
1251           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1252           else break;
1253         }
1254         imark = i;
1255         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1256         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1257       }
1258       if (idx) {
1259         *idx = idx_p = mat->rowindices;
1260         if (imark > -1) {
1261           for (i=0; i<imark; i++) {
1262             idx_p[i] = cmap[cworkB[i]];
1263           }
1264         } else {
1265           for (i=0; i<nzB; i++) {
1266             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1267             else break;
1268           }
1269           imark = i;
1270         }
1271         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1272         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1273       }
1274     } else {
1275       if (idx) *idx = 0;
1276       if (v)   *v   = 0;
1277     }
1278   }
1279   *nz = nztot;
1280   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1281   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1287 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1288 {
1289   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1290 
1291   PetscFunctionBegin;
1292   if (aij->getrowactive == PETSC_FALSE) {
1293     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1294   }
1295   aij->getrowactive = PETSC_FALSE;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatNorm_MPIAIJ"
1301 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1302 {
1303   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1304   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1305   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1306   PetscReal    sum = 0.0;
1307   PetscScalar  *v;
1308 
1309   PetscFunctionBegin;
1310   if (aij->size == 1) {
1311     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1312   } else {
1313     if (type == NORM_FROBENIUS) {
1314       v = amat->a;
1315       for (i=0; i<amat->nz; i++) {
1316 #if defined(PETSC_USE_COMPLEX)
1317         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1318 #else
1319         sum += (*v)*(*v); v++;
1320 #endif
1321       }
1322       v = bmat->a;
1323       for (i=0; i<bmat->nz; i++) {
1324 #if defined(PETSC_USE_COMPLEX)
1325         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1331       *norm = sqrt(*norm);
1332     } else if (type == NORM_1) { /* max column norm */
1333       PetscReal *tmp,*tmp2;
1334       int    *jj,*garray = aij->garray;
1335       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1336       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1337       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1338       *norm = 0.0;
1339       v = amat->a; jj = amat->j;
1340       for (j=0; j<amat->nz; j++) {
1341         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1342       }
1343       v = bmat->a; jj = bmat->j;
1344       for (j=0; j<bmat->nz; j++) {
1345         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1346       }
1347       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1348       for (j=0; j<mat->N; j++) {
1349         if (tmp2[j] > *norm) *norm = tmp2[j];
1350       }
1351       ierr = PetscFree(tmp);CHKERRQ(ierr);
1352       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1353     } else if (type == NORM_INFINITY) { /* max row norm */
1354       PetscReal ntemp = 0.0;
1355       for (j=0; j<aij->A->m; j++) {
1356         v = amat->a + amat->i[j] + shift;
1357         sum = 0.0;
1358         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1359           sum += PetscAbsScalar(*v); v++;
1360         }
1361         v = bmat->a + bmat->i[j] + shift;
1362         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         if (sum > ntemp) ntemp = sum;
1366       }
1367       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1368     } else {
1369       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1370     }
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatTranspose_MPIAIJ"
1377 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1378 {
1379   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1380   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1381   int          ierr,shift = Aloc->indexshift;
1382   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1383   Mat          B;
1384   PetscScalar  *array;
1385 
1386   PetscFunctionBegin;
1387   if (!matout && M != N) {
1388     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1389   }
1390 
1391   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1392 
1393   /* copy over the A part */
1394   Aloc = (Mat_SeqAIJ*)a->A->data;
1395   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1396   row = a->rstart;
1397   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1398   for (i=0; i<m; i++) {
1399     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1400     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1401   }
1402   aj = Aloc->j;
1403   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1404 
1405   /* copy over the B part */
1406   Aloc = (Mat_SeqAIJ*)a->B->data;
1407   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1408   row  = a->rstart;
1409   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1410   ct   = cols;
1411   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1412   for (i=0; i<m; i++) {
1413     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1414     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1415   }
1416   ierr = PetscFree(ct);CHKERRQ(ierr);
1417   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1418   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1419   if (matout) {
1420     *matout = B;
1421   } else {
1422     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1423   }
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1429 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1430 {
1431   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1432   Mat        a = aij->A,b = aij->B;
1433   int        ierr,s1,s2,s3;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1437   if (rr) {
1438     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1439     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1440     /* Overlap communication with computation. */
1441     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1442   }
1443   if (ll) {
1444     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1445     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1446     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1447   }
1448   /* scale  the diagonal block */
1449   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1450 
1451   if (rr) {
1452     /* Do a scatter end and then right scale the off-diagonal block */
1453     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1454     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1455   }
1456 
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1463 int MatPrintHelp_MPIAIJ(Mat A)
1464 {
1465   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1466   int        ierr;
1467 
1468   PetscFunctionBegin;
1469   if (!a->rank) {
1470     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1477 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1478 {
1479   PetscFunctionBegin;
1480   *bs = 1;
1481   PetscFunctionReturn(0);
1482 }
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1485 int MatSetUnfactored_MPIAIJ(Mat A)
1486 {
1487   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1488   int        ierr;
1489 
1490   PetscFunctionBegin;
1491   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatEqual_MPIAIJ"
1497 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1498 {
1499   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1500   Mat        a,b,c,d;
1501   PetscTruth flg;
1502   int        ierr;
1503 
1504   PetscFunctionBegin;
1505   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1506   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1507   a = matA->A; b = matA->B;
1508   c = matB->A; d = matB->B;
1509 
1510   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1511   if (flg == PETSC_TRUE) {
1512     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1513   }
1514   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatCopy_MPIAIJ"
1520 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1521 {
1522   int        ierr;
1523   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1524   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1525   PetscTruth flg;
1526 
1527   PetscFunctionBegin;
1528   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1529   if (str != SAME_NONZERO_PATTERN || !flg) {
1530     /* because of the column compression in the off-processor part of the matrix a->B,
1531        the number of columns in a->B and b->B may be different, hence we cannot call
1532        the MatCopy() directly on the two parts. If need be, we can provide a more
1533        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1534        then copying the submatrices */
1535     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1536   } else {
1537     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1538     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1545 int MatSetUpPreallocation_MPIAIJ(Mat A)
1546 {
1547   int        ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1555 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1556 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1557 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1558 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1559 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1560 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1561 #endif
1562 
1563 #include "petscblaslapack.h"
1564 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatAXPY_MPIAIJ"
1567 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1568 {
1569   int        ierr,one=1,i;
1570   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1571   Mat_SeqAIJ *x,*y;
1572 
1573   PetscFunctionBegin;
1574   if (str == SAME_NONZERO_PATTERN) {
1575     x = (Mat_SeqAIJ *)xx->A->data;
1576     y = (Mat_SeqAIJ *)yy->A->data;
1577     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1578     x = (Mat_SeqAIJ *)xx->B->data;
1579     y = (Mat_SeqAIJ *)yy->B->data;
1580     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1581   } else if (str == SUBSET_NONZERO_PATTERN) {
1582     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1583 
1584     x = (Mat_SeqAIJ *)xx->B->data;
1585     y = (Mat_SeqAIJ *)yy->B->data;
1586     if (y->xtoy && y->XtoY != xx->B) {
1587       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1588       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1589     }
1590     if (!y->xtoy) { /* get xtoy */
1591       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1592       y->XtoY = xx->B;
1593     }
1594     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1595   } else {
1596     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 /* -------------------------------------------------------------------*/
1602 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1603        MatGetRow_MPIAIJ,
1604        MatRestoreRow_MPIAIJ,
1605        MatMult_MPIAIJ,
1606        MatMultAdd_MPIAIJ,
1607        MatMultTranspose_MPIAIJ,
1608        MatMultTransposeAdd_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        0,
1615        MatRelax_MPIAIJ,
1616        MatTranspose_MPIAIJ,
1617        MatGetInfo_MPIAIJ,
1618        MatEqual_MPIAIJ,
1619        MatGetDiagonal_MPIAIJ,
1620        MatDiagonalScale_MPIAIJ,
1621        MatNorm_MPIAIJ,
1622        MatAssemblyBegin_MPIAIJ,
1623        MatAssemblyEnd_MPIAIJ,
1624        0,
1625        MatSetOption_MPIAIJ,
1626        MatZeroEntries_MPIAIJ,
1627        MatZeroRows_MPIAIJ,
1628 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1629        MatLUFactorSymbolic_MPIAIJ_TFS,
1630 #else
1631        0,
1632 #endif
1633        0,
1634        0,
1635        0,
1636        MatSetUpPreallocation_MPIAIJ,
1637        0,
1638        0,
1639        0,
1640        0,
1641        MatDuplicate_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646        MatAXPY_MPIAIJ,
1647        MatGetSubMatrices_MPIAIJ,
1648        MatIncreaseOverlap_MPIAIJ,
1649        MatGetValues_MPIAIJ,
1650        MatCopy_MPIAIJ,
1651        MatPrintHelp_MPIAIJ,
1652        MatScale_MPIAIJ,
1653        0,
1654        0,
1655        0,
1656        MatGetBlockSize_MPIAIJ,
1657        0,
1658        0,
1659        0,
1660        0,
1661        MatFDColoringCreate_MPIAIJ,
1662        0,
1663        MatSetUnfactored_MPIAIJ,
1664        0,
1665        0,
1666        MatGetSubMatrix_MPIAIJ,
1667        MatDestroy_MPIAIJ,
1668        MatView_MPIAIJ,
1669        MatGetPetscMaps_Petsc,
1670        0,
1671        0,
1672        0,
1673        0,
1674        0,
1675        0,
1676        0,
1677        0,
1678        MatSetColoring_MPIAIJ,
1679        MatSetValuesAdic_MPIAIJ,
1680        MatSetValuesAdifor_MPIAIJ
1681 };
1682 
1683 /* ----------------------------------------------------------------------------------------*/
1684 
1685 EXTERN_C_BEGIN
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1688 int MatStoreValues_MPIAIJ(Mat mat)
1689 {
1690   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1691   int        ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1695   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 EXTERN_C_END
1699 
1700 EXTERN_C_BEGIN
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1703 int MatRetrieveValues_MPIAIJ(Mat mat)
1704 {
1705   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1706   int        ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1710   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 EXTERN_C_END
1714 
1715 #include "petscpc.h"
1716 EXTERN_C_BEGIN
1717 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1718 EXTERN_C_END
1719 
1720 EXTERN_C_BEGIN
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatCreate_MPIAIJ"
1723 int MatCreate_MPIAIJ(Mat B)
1724 {
1725   Mat_MPIAIJ *b;
1726   int        ierr,i,size;
1727 
1728   PetscFunctionBegin;
1729   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1730 
1731   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1732   B->data         = (void*)b;
1733   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1734   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1735   B->factor       = 0;
1736   B->assembled    = PETSC_FALSE;
1737   B->mapping      = 0;
1738 
1739   B->insertmode      = NOT_SET_VALUES;
1740   b->size            = size;
1741   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1742 
1743   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1744   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1745 
1746   /* the information in the maps duplicates the information computed below, eventually
1747      we should remove the duplicate information that is not contained in the maps */
1748   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1749   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1750 
1751   /* build local table of row and column ownerships */
1752   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1753   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1754   b->cowners = b->rowners + b->size + 2;
1755   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1756   b->rowners[0] = 0;
1757   for (i=2; i<=b->size; i++) {
1758     b->rowners[i] += b->rowners[i-1];
1759   }
1760   b->rstart = b->rowners[b->rank];
1761   b->rend   = b->rowners[b->rank+1];
1762   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1763   b->cowners[0] = 0;
1764   for (i=2; i<=b->size; i++) {
1765     b->cowners[i] += b->cowners[i-1];
1766   }
1767   b->cstart = b->cowners[b->rank];
1768   b->cend   = b->cowners[b->rank+1];
1769 
1770   /* build cache for off array entries formed */
1771   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1772   b->donotstash  = PETSC_FALSE;
1773   b->colmap      = 0;
1774   b->garray      = 0;
1775   b->roworiented = PETSC_TRUE;
1776 
1777   /* stuff used for matrix vector multiply */
1778   b->lvec      = PETSC_NULL;
1779   b->Mvctx     = PETSC_NULL;
1780 
1781   /* stuff for MatGetRow() */
1782   b->rowindices   = 0;
1783   b->rowvalues    = 0;
1784   b->getrowactive = PETSC_FALSE;
1785 
1786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1787                                      "MatStoreValues_MPIAIJ",
1788                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1790                                      "MatRetrieveValues_MPIAIJ",
1791                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1793 				     "MatGetDiagonalBlock_MPIAIJ",
1794                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1796 				     "MatIsSymmetric_MPIAIJ",
1797 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 EXTERN_C_END
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1804 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1805 {
1806   Mat        mat;
1807   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1808   int        ierr;
1809 
1810   PetscFunctionBegin;
1811   *newmat       = 0;
1812   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1813   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1814   a    = (Mat_MPIAIJ*)mat->data;
1815   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1816   mat->factor       = matin->factor;
1817   mat->assembled    = PETSC_TRUE;
1818   mat->insertmode   = NOT_SET_VALUES;
1819   mat->preallocated = PETSC_TRUE;
1820 
1821   a->rstart       = oldmat->rstart;
1822   a->rend         = oldmat->rend;
1823   a->cstart       = oldmat->cstart;
1824   a->cend         = oldmat->cend;
1825   a->size         = oldmat->size;
1826   a->rank         = oldmat->rank;
1827   a->donotstash   = oldmat->donotstash;
1828   a->roworiented  = oldmat->roworiented;
1829   a->rowindices   = 0;
1830   a->rowvalues    = 0;
1831   a->getrowactive = PETSC_FALSE;
1832 
1833   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1834   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1835   if (oldmat->colmap) {
1836 #if defined (PETSC_USE_CTABLE)
1837     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1838 #else
1839     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1840     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1841     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1842 #endif
1843   } else a->colmap = 0;
1844   if (oldmat->garray) {
1845     int len;
1846     len  = oldmat->B->n;
1847     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1848     PetscLogObjectMemory(mat,len*sizeof(int));
1849     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1850   } else a->garray = 0;
1851 
1852   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1853   PetscLogObjectParent(mat,a->lvec);
1854   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1855   PetscLogObjectParent(mat,a->Mvctx);
1856   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1857   PetscLogObjectParent(mat,a->A);
1858   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1859   PetscLogObjectParent(mat,a->B);
1860   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1861   *newmat = mat;
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #include "petscsys.h"
1866 
1867 EXTERN_C_BEGIN
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatLoad_MPIAIJ"
1870 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1871 {
1872   Mat          A;
1873   PetscScalar  *vals,*svals;
1874   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1875   MPI_Status   status;
1876   int          i,nz,ierr,j,rstart,rend,fd;
1877   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1878   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1879   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1880 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1881   PetscTruth   flag;
1882 #endif
1883 
1884   PetscFunctionBegin;
1885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1887   if (!rank) {
1888     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1889     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1890     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1891     if (header[3] < 0) {
1892       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1893     }
1894   }
1895 
1896   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1897   M = header[1]; N = header[2];
1898   /* determine ownership of all rows */
1899   m = M/size + ((M % size) > rank);
1900   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1901   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1902   rowners[0] = 0;
1903   for (i=2; i<=size; i++) {
1904     rowners[i] += rowners[i-1];
1905   }
1906   rstart = rowners[rank];
1907   rend   = rowners[rank+1];
1908 
1909   /* distribute row lengths to all processors */
1910   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1911   offlens = ourlens + (rend-rstart);
1912   if (!rank) {
1913     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1914     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1915     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1916     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1917     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1918     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1919   } else {
1920     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1921   }
1922 
1923   if (!rank) {
1924     /* calculate the number of nonzeros on each processor */
1925     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1926     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1927     for (i=0; i<size; i++) {
1928       for (j=rowners[i]; j< rowners[i+1]; j++) {
1929         procsnz[i] += rowlengths[j];
1930       }
1931     }
1932     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1933 
1934     /* determine max buffer needed and allocate it */
1935     maxnz = 0;
1936     for (i=0; i<size; i++) {
1937       maxnz = PetscMax(maxnz,procsnz[i]);
1938     }
1939     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1940 
1941     /* read in my part of the matrix column indices  */
1942     nz   = procsnz[0];
1943     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1945 
1946     /* read in every one elses and ship off */
1947     for (i=1; i<size; i++) {
1948       nz   = procsnz[i];
1949       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1950       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1951     }
1952     ierr = PetscFree(cols);CHKERRQ(ierr);
1953   } else {
1954     /* determine buffer space needed for message */
1955     nz = 0;
1956     for (i=0; i<m; i++) {
1957       nz += ourlens[i];
1958     }
1959     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1960 
1961     /* receive message of column indices*/
1962     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1963     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1964     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1965   }
1966 
1967   /* determine column ownership if matrix is not square */
1968   if (N != M) {
1969     n      = N/size + ((N % size) > rank);
1970     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1971     cstart = cend - n;
1972   } else {
1973     cstart = rstart;
1974     cend   = rend;
1975     n      = cend - cstart;
1976   }
1977 
1978   /* loop over local rows, determining number of off diagonal entries */
1979   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1980   jj = 0;
1981   for (i=0; i<m; i++) {
1982     for (j=0; j<ourlens[i]; j++) {
1983       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1984       jj++;
1985     }
1986   }
1987 
1988   /* create our matrix */
1989   for (i=0; i<m; i++) {
1990     ourlens[i] -= offlens[i];
1991   }
1992   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1993   A = *newmat;
1994   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1995   for (i=0; i<m; i++) {
1996     ourlens[i] += offlens[i];
1997   }
1998 
1999   if (!rank) {
2000     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2001 
2002     /* read in my part of the matrix numerical values  */
2003     nz   = procsnz[0];
2004     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2005 
2006     /* insert into matrix */
2007     jj      = rstart;
2008     smycols = mycols;
2009     svals   = vals;
2010     for (i=0; i<m; i++) {
2011       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2012       smycols += ourlens[i];
2013       svals   += ourlens[i];
2014       jj++;
2015     }
2016 
2017     /* read in other processors and ship out */
2018     for (i=1; i<size; i++) {
2019       nz   = procsnz[i];
2020       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2021       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2022     }
2023     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2024   } else {
2025     /* receive numeric values */
2026     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2027 
2028     /* receive message of values*/
2029     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2030     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2031     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2032 
2033     /* insert into matrix */
2034     jj      = rstart;
2035     smycols = mycols;
2036     svals   = vals;
2037     for (i=0; i<m; i++) {
2038       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2039       smycols += ourlens[i];
2040       svals   += ourlens[i];
2041       jj++;
2042     }
2043   }
2044   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2045   ierr = PetscFree(vals);CHKERRQ(ierr);
2046   ierr = PetscFree(mycols);CHKERRQ(ierr);
2047   ierr = PetscFree(rowners);CHKERRQ(ierr);
2048 
2049   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2051 #if defined(PETSC_HAVE_SPOOLES)
2052   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2053   if (flag) {
2054     if (size == 1) {
2055       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2056     } else {
2057       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2058     }
2059   }
2060 #endif
2061 #if defined(PETSC_HAVE_SUPERLUDIST)
2062   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2063   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2064 #endif
2065   PetscFunctionReturn(0);
2066 }
2067 EXTERN_C_END
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2071 /*
2072     Not great since it makes two copies of the submatrix, first an SeqAIJ
2073   in local and then by concatenating the local matrices the end result.
2074   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2075 */
2076 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2077 {
2078   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2079   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2080   Mat          *local,M,Mreuse;
2081   PetscScalar  *vwork,*aa;
2082   MPI_Comm     comm = mat->comm;
2083   Mat_SeqAIJ   *aij;
2084 
2085 
2086   PetscFunctionBegin;
2087   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2088   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2089 
2090   if (call ==  MAT_REUSE_MATRIX) {
2091     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2092     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2093     local = &Mreuse;
2094     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2095   } else {
2096     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2097     Mreuse = *local;
2098     ierr   = PetscFree(local);CHKERRQ(ierr);
2099   }
2100 
2101   /*
2102       m - number of local rows
2103       n - number of columns (same on all processors)
2104       rstart - first row in new global matrix generated
2105   */
2106   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2107   if (call == MAT_INITIAL_MATRIX) {
2108     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2109     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2110     ii  = aij->i;
2111     jj  = aij->j;
2112 
2113     /*
2114         Determine the number of non-zeros in the diagonal and off-diagonal
2115         portions of the matrix in order to do correct preallocation
2116     */
2117 
2118     /* first get start and end of "diagonal" columns */
2119     if (csize == PETSC_DECIDE) {
2120       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2121       if (mglobal == n) { /* square matrix */
2122 	nlocal = m;
2123       } else {
2124         nlocal = n/size + ((n % size) > rank);
2125       }
2126     } else {
2127       nlocal = csize;
2128     }
2129     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2130     rstart = rend - nlocal;
2131     if (rank == size - 1 && rend != n) {
2132       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2133     }
2134 
2135     /* next, compute all the lengths */
2136     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2137     olens = dlens + m;
2138     for (i=0; i<m; i++) {
2139       jend = ii[i+1] - ii[i];
2140       olen = 0;
2141       dlen = 0;
2142       for (j=0; j<jend; j++) {
2143         if (*jj < rstart || *jj >= rend) olen++;
2144         else dlen++;
2145         jj++;
2146       }
2147       olens[i] = olen;
2148       dlens[i] = dlen;
2149     }
2150     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2151     ierr = PetscFree(dlens);CHKERRQ(ierr);
2152   } else {
2153     int ml,nl;
2154 
2155     M = *newmat;
2156     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2157     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2158     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2159     /*
2160          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2161        rather than the slower MatSetValues().
2162     */
2163     M->was_assembled = PETSC_TRUE;
2164     M->assembled     = PETSC_FALSE;
2165   }
2166   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2167   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2168   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2169   ii  = aij->i;
2170   jj  = aij->j;
2171   aa  = aij->a;
2172   for (i=0; i<m; i++) {
2173     row   = rstart + i;
2174     nz    = ii[i+1] - ii[i];
2175     cwork = jj;     jj += nz;
2176     vwork = aa;     aa += nz;
2177     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2178   }
2179 
2180   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   *newmat = M;
2183 
2184   /* save submatrix used in processor for next request */
2185   if (call ==  MAT_INITIAL_MATRIX) {
2186     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2187     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2188   }
2189 
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2195 /*@C
2196    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2197    (the default parallel PETSc format).  For good matrix assembly performance
2198    the user should preallocate the matrix storage by setting the parameters
2199    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2200    performance can be increased by more than a factor of 50.
2201 
2202    Collective on MPI_Comm
2203 
2204    Input Parameters:
2205 +  A - the matrix
2206 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2207            (same value is used for all local rows)
2208 .  d_nnz - array containing the number of nonzeros in the various rows of the
2209            DIAGONAL portion of the local submatrix (possibly different for each row)
2210            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2211            The size of this array is equal to the number of local rows, i.e 'm'.
2212            You must leave room for the diagonal entry even if it is zero.
2213 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2214            submatrix (same value is used for all local rows).
2215 -  o_nnz - array containing the number of nonzeros in the various rows of the
2216            OFF-DIAGONAL portion of the local submatrix (possibly different for
2217            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2218            structure. The size of this array is equal to the number
2219            of local rows, i.e 'm'.
2220 
2221    The AIJ format (also called the Yale sparse matrix format or
2222    compressed row storage), is fully compatible with standard Fortran 77
2223    storage.  That is, the stored row and column indices can begin at
2224    either one (as in Fortran) or zero.  See the users manual for details.
2225 
2226    The user MUST specify either the local or global matrix dimensions
2227    (possibly both).
2228 
2229    The parallel matrix is partitioned such that the first m0 rows belong to
2230    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2231    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2232 
2233    The DIAGONAL portion of the local submatrix of a processor can be defined
2234    as the submatrix which is obtained by extraction the part corresponding
2235    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2236    first row that belongs to the processor, and r2 is the last row belonging
2237    to the this processor. This is a square mxm matrix. The remaining portion
2238    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2239 
2240    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2241 
2242    By default, this format uses inodes (identical nodes) when possible.
2243    We search for consecutive rows with the same nonzero structure, thereby
2244    reusing matrix information to achieve increased efficiency.
2245 
2246    Options Database Keys:
2247 +  -mat_aij_no_inode  - Do not use inodes
2248 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2249 -  -mat_aij_oneindex - Internally use indexing starting at 1
2250         rather than 0.  Note that when calling MatSetValues(),
2251         the user still MUST index entries starting at 0!
2252 
2253    Example usage:
2254 
2255    Consider the following 8x8 matrix with 34 non-zero values, that is
2256    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2257    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2258    as follows:
2259 
2260 .vb
2261             1  2  0  |  0  3  0  |  0  4
2262     Proc0   0  5  6  |  7  0  0  |  8  0
2263             9  0 10  | 11  0  0  | 12  0
2264     -------------------------------------
2265            13  0 14  | 15 16 17  |  0  0
2266     Proc1   0 18  0  | 19 20 21  |  0  0
2267             0  0  0  | 22 23  0  | 24  0
2268     -------------------------------------
2269     Proc2  25 26 27  |  0  0 28  | 29  0
2270            30  0  0  | 31 32 33  |  0 34
2271 .ve
2272 
2273    This can be represented as a collection of submatrices as:
2274 
2275 .vb
2276       A B C
2277       D E F
2278       G H I
2279 .ve
2280 
2281    Where the submatrices A,B,C are owned by proc0, D,E,F are
2282    owned by proc1, G,H,I are owned by proc2.
2283 
2284    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2285    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2286    The 'M','N' parameters are 8,8, and have the same values on all procs.
2287 
2288    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2289    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2290    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2291    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2292    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2293    matrix, ans [DF] as another SeqAIJ matrix.
2294 
2295    When d_nz, o_nz parameters are specified, d_nz storage elements are
2296    allocated for every row of the local diagonal submatrix, and o_nz
2297    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2298    One way to choose d_nz and o_nz is to use the max nonzerors per local
2299    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2300    In this case, the values of d_nz,o_nz are:
2301 .vb
2302      proc0 : dnz = 2, o_nz = 2
2303      proc1 : dnz = 3, o_nz = 2
2304      proc2 : dnz = 1, o_nz = 4
2305 .ve
2306    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2307    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2308    for proc3. i.e we are using 12+15+10=37 storage locations to store
2309    34 values.
2310 
2311    When d_nnz, o_nnz parameters are specified, the storage is specified
2312    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2313    In the above case the values for d_nnz,o_nnz are:
2314 .vb
2315      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2316      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2317      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2318 .ve
2319    Here the space allocated is sum of all the above values i.e 34, and
2320    hence pre-allocation is perfect.
2321 
2322    Level: intermediate
2323 
2324 .keywords: matrix, aij, compressed row, sparse, parallel
2325 
2326 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2327 @*/
2328 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2329 {
2330   Mat_MPIAIJ   *b;
2331   int          ierr,i;
2332   PetscTruth   flg2;
2333 
2334   PetscFunctionBegin;
2335   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2336   if (!flg2) PetscFunctionReturn(0);
2337   B->preallocated = PETSC_TRUE;
2338   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2339   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2340   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2341   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2342   if (d_nnz) {
2343     for (i=0; i<B->m; i++) {
2344       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2345     }
2346   }
2347   if (o_nnz) {
2348     for (i=0; i<B->m; i++) {
2349       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2350     }
2351   }
2352   b = (Mat_MPIAIJ*)B->data;
2353 
2354   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2355   PetscLogObjectParent(B,b->A);
2356   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2357   PetscLogObjectParent(B,b->B);
2358 
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatCreateMPIAIJ"
2364 /*@C
2365    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2366    (the default parallel PETSc format).  For good matrix assembly performance
2367    the user should preallocate the matrix storage by setting the parameters
2368    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2369    performance can be increased by more than a factor of 50.
2370 
2371    Collective on MPI_Comm
2372 
2373    Input Parameters:
2374 +  comm - MPI communicator
2375 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2376            This value should be the same as the local size used in creating the
2377            y vector for the matrix-vector product y = Ax.
2378 .  n - This value should be the same as the local size used in creating the
2379        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2380        calculated if N is given) For square matrices n is almost always m.
2381 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2382 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2383 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2384            (same value is used for all local rows)
2385 .  d_nnz - array containing the number of nonzeros in the various rows of the
2386            DIAGONAL portion of the local submatrix (possibly different for each row)
2387            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2388            The size of this array is equal to the number of local rows, i.e 'm'.
2389            You must leave room for the diagonal entry even if it is zero.
2390 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2391            submatrix (same value is used for all local rows).
2392 -  o_nnz - array containing the number of nonzeros in the various rows of the
2393            OFF-DIAGONAL portion of the local submatrix (possibly different for
2394            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2395            structure. The size of this array is equal to the number
2396            of local rows, i.e 'm'.
2397 
2398    Output Parameter:
2399 .  A - the matrix
2400 
2401    Notes:
2402    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2403    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2404    storage requirements for this matrix.
2405 
2406    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2407    processor than it must be used on all processors that share the object for
2408    that argument.
2409 
2410    The AIJ format (also called the Yale sparse matrix format or
2411    compressed row storage), is fully compatible with standard Fortran 77
2412    storage.  That is, the stored row and column indices can begin at
2413    either one (as in Fortran) or zero.  See the users manual for details.
2414 
2415    The user MUST specify either the local or global matrix dimensions
2416    (possibly both).
2417 
2418    The parallel matrix is partitioned such that the first m0 rows belong to
2419    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2420    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2421 
2422    The DIAGONAL portion of the local submatrix of a processor can be defined
2423    as the submatrix which is obtained by extraction the part corresponding
2424    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2425    first row that belongs to the processor, and r2 is the last row belonging
2426    to the this processor. This is a square mxm matrix. The remaining portion
2427    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2428 
2429    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2430 
2431    By default, this format uses inodes (identical nodes) when possible.
2432    We search for consecutive rows with the same nonzero structure, thereby
2433    reusing matrix information to achieve increased efficiency.
2434 
2435    Options Database Keys:
2436 +  -mat_aij_no_inode  - Do not use inodes
2437 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2438 -  -mat_aij_oneindex - Internally use indexing starting at 1
2439         rather than 0.  Note that when calling MatSetValues(),
2440         the user still MUST index entries starting at 0!
2441 
2442 
2443    Example usage:
2444 
2445    Consider the following 8x8 matrix with 34 non-zero values, that is
2446    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2447    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2448    as follows:
2449 
2450 .vb
2451             1  2  0  |  0  3  0  |  0  4
2452     Proc0   0  5  6  |  7  0  0  |  8  0
2453             9  0 10  | 11  0  0  | 12  0
2454     -------------------------------------
2455            13  0 14  | 15 16 17  |  0  0
2456     Proc1   0 18  0  | 19 20 21  |  0  0
2457             0  0  0  | 22 23  0  | 24  0
2458     -------------------------------------
2459     Proc2  25 26 27  |  0  0 28  | 29  0
2460            30  0  0  | 31 32 33  |  0 34
2461 .ve
2462 
2463    This can be represented as a collection of submatrices as:
2464 
2465 .vb
2466       A B C
2467       D E F
2468       G H I
2469 .ve
2470 
2471    Where the submatrices A,B,C are owned by proc0, D,E,F are
2472    owned by proc1, G,H,I are owned by proc2.
2473 
2474    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2475    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2476    The 'M','N' parameters are 8,8, and have the same values on all procs.
2477 
2478    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2479    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2480    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2481    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2482    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2483    matrix, ans [DF] as another SeqAIJ matrix.
2484 
2485    When d_nz, o_nz parameters are specified, d_nz storage elements are
2486    allocated for every row of the local diagonal submatrix, and o_nz
2487    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2488    One way to choose d_nz and o_nz is to use the max nonzerors per local
2489    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2490    In this case, the values of d_nz,o_nz are:
2491 .vb
2492      proc0 : dnz = 2, o_nz = 2
2493      proc1 : dnz = 3, o_nz = 2
2494      proc2 : dnz = 1, o_nz = 4
2495 .ve
2496    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2497    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2498    for proc3. i.e we are using 12+15+10=37 storage locations to store
2499    34 values.
2500 
2501    When d_nnz, o_nnz parameters are specified, the storage is specified
2502    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2503    In the above case the values for d_nnz,o_nnz are:
2504 .vb
2505      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2506      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2507      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2508 .ve
2509    Here the space allocated is sum of all the above values i.e 34, and
2510    hence pre-allocation is perfect.
2511 
2512    Level: intermediate
2513 
2514 .keywords: matrix, aij, compressed row, sparse, parallel
2515 
2516 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2517 @*/
2518 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2519 {
2520   int ierr,size;
2521 
2522   PetscFunctionBegin;
2523   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2525   if (size > 1) {
2526     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2527     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2528   } else {
2529     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2530     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2531   }
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2537 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2538 {
2539   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2540   PetscFunctionBegin;
2541   *Ad     = a->A;
2542   *Ao     = a->B;
2543   *colmap = a->garray;
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2549 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2550 {
2551   int        ierr,i;
2552   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2553 
2554   PetscFunctionBegin;
2555   if (coloring->ctype == IS_COLORING_LOCAL) {
2556     ISColoringValue *allcolors,*colors;
2557     ISColoring      ocoloring;
2558 
2559     /* set coloring for diagonal portion */
2560     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2561 
2562     /* set coloring for off-diagonal portion */
2563     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2564     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2565     for (i=0; i<a->B->n; i++) {
2566       colors[i] = allcolors[a->garray[i]];
2567     }
2568     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2569     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2570     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2571     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2572   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2573     ISColoringValue *colors;
2574     int             *larray;
2575     ISColoring      ocoloring;
2576 
2577     /* set coloring for diagonal portion */
2578     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2579     for (i=0; i<a->A->n; i++) {
2580       larray[i] = i + a->cstart;
2581     }
2582     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2583     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2584     for (i=0; i<a->A->n; i++) {
2585       colors[i] = coloring->colors[larray[i]];
2586     }
2587     ierr = PetscFree(larray);CHKERRQ(ierr);
2588     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2589     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2590     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2591 
2592     /* set coloring for off-diagonal portion */
2593     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2594     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2595     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2596     for (i=0; i<a->B->n; i++) {
2597       colors[i] = coloring->colors[larray[i]];
2598     }
2599     ierr = PetscFree(larray);CHKERRQ(ierr);
2600     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2601     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2602     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2603   } else {
2604     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2605   }
2606 
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 #undef __FUNCT__
2611 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2612 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2613 {
2614   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2615   int        ierr;
2616 
2617   PetscFunctionBegin;
2618   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2619   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2625 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2626 {
2627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2628   int        ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2632   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "MatMerge"
2638 /*@C
2639       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2640                  matrices from each processor
2641 
2642     Collective on MPI_Comm
2643 
2644    Input Parameters:
2645 +    comm - the communicators the parallel matrix will live on
2646 -    inmat - the input sequential matrices
2647 
2648    Output Parameter:
2649 .    outmat - the parallel matrix generated
2650 
2651     Level: advanced
2652 
2653    Notes: The number of columns of the matrix in EACH of the seperate files
2654       MUST be the same.
2655 
2656 @*/
2657 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2658 {
2659   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2660   PetscScalar *values;
2661   PetscMap    columnmap,rowmap;
2662 
2663   PetscFunctionBegin;
2664 
2665   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2666 
2667   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2668   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2669   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2670   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2671   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2672   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2673 
2674   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2675   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2676   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2677   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2678   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2679 
2680   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2681   for (i=0;i<m;i++) {
2682     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2683     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2684     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2685   }
2686   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2687   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2688 
2689   for (i=0;i<m;i++) {
2690     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2691     I    = i + rstart;
2692     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2693     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2694   }
2695   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2696   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2697   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2698 
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 #undef __FUNCT__
2703 #define __FUNCT__ "MatFileSplit"
2704 int MatFileSplit(Mat A,char *outfile)
2705 {
2706   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2707   PetscViewer out;
2708   char        *name;
2709   Mat         B;
2710   PetscScalar *values;
2711 
2712   PetscFunctionBegin;
2713 
2714   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2715   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2716   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2717   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2718   for (i=0;i<m;i++) {
2719     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2720     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2721     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2722   }
2723   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725 
2726   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2727   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2728   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2729   sprintf(name,"%s.%d",outfile,rank);
2730   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2731   ierr = PetscFree(name);
2732   ierr = MatView(B,out);CHKERRQ(ierr);
2733   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2734   ierr = MatDestroy(B);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737