xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7cf1b8d3bee2bb5f125300895b691a95a99b6229)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_Binary"
720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727   PetscScalar       *column_values;
728 
729   PetscFunctionBegin;
730   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
731   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
732   nz   = A->nz + B->nz;
733   if (rank == 0) {
734     header[0] = MAT_FILE_COOKIE;
735     header[1] = mat->M;
736     header[2] = mat->N;
737     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
738     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
739     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
740     /* get largest number of rows any processor has */
741     rlen = mat->m;
742     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
743     for (i=1; i<size; i++) {
744       rlen = PetscMax(rlen,range[i+1] - range[i]);
745     }
746   } else {
747     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     rlen = mat->m;
749   }
750 
751   /* load up the local row counts */
752   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
753   for (i=0; i<mat->m; i++) {
754     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755   }
756 
757   /* store the row lengths to the file */
758   if (rank == 0) {
759     MPI_Status status;
760     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
761     for (i=1; i<size; i++) {
762       rlen = range[i+1] - range[i];
763       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
764       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
765     }
766   } else {
767     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
768   }
769   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
770 
771   /* load up the local column indices */
772   nzmax = nz; /* )th processor needs space a largest processor needs */
773   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
774   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
775   cnt  = 0;
776   for (i=0; i<mat->m; i++) {
777     for (j=B->i[i]; j<B->i[i+1]; j++) {
778       if ( (col = garray[B->j[j]]) > cstart) break;
779       column_indices[cnt++] = col;
780     }
781     for (k=A->i[i]; k<A->i[i+1]; k++) {
782       column_indices[cnt++] = A->j[k] + cstart;
783     }
784     for (; j<B->i[i+1]; j++) {
785       column_indices[cnt++] = garray[B->j[j]];
786     }
787   }
788   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
789 
790   /* store the column indices to the file */
791   if (rank == 0) {
792     MPI_Status status;
793     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
796       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(column_indices);CHKERRQ(ierr);
805 
806   /* load up the local column values */
807   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
808   cnt  = 0;
809   for (i=0; i<mat->m; i++) {
810     for (j=B->i[i]; j<B->i[i+1]; j++) {
811       if ( garray[B->j[j]] > cstart) break;
812       column_values[cnt++] = B->a[j];
813     }
814     for (k=A->i[i]; k<A->i[i+1]; k++) {
815       column_values[cnt++] = A->a[k];
816     }
817     for (; j<B->i[i+1]; j++) {
818       column_values[cnt++] = B->a[j];
819     }
820   }
821   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
822 
823   /* store the column values to the file */
824   if (rank == 0) {
825     MPI_Status status;
826     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
827     for (i=1; i<size; i++) {
828       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
829       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
832     }
833   } else {
834     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
835     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
836   }
837   ierr = PetscFree(column_values);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844 {
845   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848   PetscTruth        isdraw,isascii,flg,isbinary;
849   PetscViewer       sviewer;
850   PetscViewerFormat format;
851 
852   PetscFunctionBegin;
853   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
856   if (isascii) {
857     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
858     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859       MatInfo info;
860       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
861       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
862       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
863       if (flg) {
864         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
865 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
866       } else {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
868 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
869       }
870       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
872       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
873       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
874       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
875       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878       PetscFunctionReturn(0);
879     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
882 #endif
883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
884       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
885 #endif
886 
887       PetscFunctionReturn(0);
888     }
889   } else if (isbinary) {
890     if (size == 1) {
891       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
893     } else {
894       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
895     }
896     PetscFunctionReturn(0);
897   } else if (isdraw) {
898     PetscDraw  draw;
899     PetscTruth isnull;
900     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
901     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
902   }
903 
904   if (size == 1) {
905     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
906     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
907   } else {
908     /* assemble the entire matrix onto first processor. */
909     Mat         A;
910     Mat_SeqAIJ *Aloc;
911     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912     PetscScalar *a;
913 
914     if (!rank) {
915       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
916     } else {
917       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
918     }
919     PetscLogObjectParent(mat,A);
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqAIJ*)aij->A->data;
923     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     row = aij->rstart;
925     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926     for (i=0; i<m; i++) {
927       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
928       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929     }
930     aj = Aloc->j;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
932 
933     /* copy over the B part */
934     Aloc = (Mat_SeqAIJ*)aij->B->data;
935     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936     row  = aij->rstart;
937     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
938     ct   = cols;
939     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940     for (i=0; i<m; i++) {
941       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
942       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943     }
944     ierr = PetscFree(ct);CHKERRQ(ierr);
945     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947     /*
948        Everyone has to call to draw the matrix since the graphics waits are
949        synchronized across all processors that share the PetscDraw object
950     */
951     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
952     if (!rank) {
953       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
955     }
956     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
957     ierr = MatDestroy(A);CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatView_MPIAIJ"
964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965 {
966   int        ierr;
967   PetscTruth isascii,isdraw,issocket,isbinary;
968 
969   PetscFunctionBegin;
970   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
971   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
974   if (isascii || isdraw || isbinary || issocket) {
975     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
976   } else {
977     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatRelax_MPIAIJ"
986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987 {
988   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989   int          ierr;
990   Vec          bb1;
991   PetscScalar  mone=-1.0;
992 
993   PetscFunctionBegin;
994   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
995 
996   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
997 
998   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1001       its--;
1002     }
1003 
1004     while (its--) {
1005       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1007 
1008       /* update rhs: bb1 = bb - B*x */
1009       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1010       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1011 
1012       /* local sweep */
1013       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014       CHKERRQ(ierr);
1015     }
1016   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021     while (its--) {
1022       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024 
1025       /* update rhs: bb1 = bb - B*x */
1026       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1027       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1028 
1029       /* local sweep */
1030       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031       CHKERRQ(ierr);
1032     }
1033   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1036       its--;
1037     }
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else {
1051     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052   }
1053 
1054   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063   Mat        A = mat->A,B = mat->B;
1064   int        ierr;
1065   PetscReal  isend[5],irecv[5];
1066 
1067   PetscFunctionBegin;
1068   info->block_size     = 1.0;
1069   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1070   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071   isend[3] = info->memory;  isend[4] = info->mallocs;
1072   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1073   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074   isend[3] += info->memory;  isend[4] += info->mallocs;
1075   if (flag == MAT_LOCAL) {
1076     info->nz_used      = isend[0];
1077     info->nz_allocated = isend[1];
1078     info->nz_unneeded  = isend[2];
1079     info->memory       = isend[3];
1080     info->mallocs      = isend[4];
1081   } else if (flag == MAT_GLOBAL_MAX) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else if (flag == MAT_GLOBAL_SUM) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   }
1096   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097   info->fill_ratio_needed = 0;
1098   info->factor_mallocs    = 0;
1099   info->rows_global       = (double)matin->M;
1100   info->columns_global    = (double)matin->N;
1101   info->rows_local        = (double)matin->m;
1102   info->columns_local     = (double)matin->N;
1103 
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSetOption_MPIAIJ"
1109 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110 {
1111   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112   int        ierr;
1113 
1114   PetscFunctionBegin;
1115   switch (op) {
1116   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118   case MAT_COLUMNS_UNSORTED:
1119   case MAT_COLUMNS_SORTED:
1120   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121   case MAT_KEEP_ZEROED_ROWS:
1122   case MAT_NEW_NONZERO_LOCATION_ERR:
1123   case MAT_USE_INODES:
1124   case MAT_DO_NOT_USE_INODES:
1125   case MAT_IGNORE_ZERO_ENTRIES:
1126     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1127     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1128     break;
1129   case MAT_ROW_ORIENTED:
1130     a->roworiented = PETSC_TRUE;
1131     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1132     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1133     break;
1134   case MAT_ROWS_SORTED:
1135   case MAT_ROWS_UNSORTED:
1136   case MAT_YES_NEW_DIAGONALS:
1137     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1138     break;
1139   case MAT_COLUMN_ORIENTED:
1140     a->roworiented = PETSC_FALSE;
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_IGNORE_OFF_PROC_ENTRIES:
1145     a->donotstash = PETSC_TRUE;
1146     break;
1147   case MAT_NO_NEW_DIAGONALS:
1148     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149   default:
1150     SETERRQ(PETSC_ERR_SUP,"unknown option");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatGetRow_MPIAIJ"
1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158 {
1159   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1160   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163   int          *cmap,*idx_p;
1164 
1165   PetscFunctionBegin;
1166   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167   mat->getrowactive = PETSC_TRUE;
1168 
1169   if (!mat->rowvalues && (idx || v)) {
1170     /*
1171         allocate enough space to hold information from the longest row.
1172     */
1173     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174     int     max = 1,tmp;
1175     for (i=0; i<matin->m; i++) {
1176       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177       if (max < tmp) { max = tmp; }
1178     }
1179     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1180     mat->rowindices = (int*)(mat->rowvalues + max);
1181   }
1182 
1183   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184   lrow = row - rstart;
1185 
1186   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187   if (!v)   {pvA = 0; pvB = 0;}
1188   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1190   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1191   nztot = nzA + nzB;
1192 
1193   cmap  = mat->garray;
1194   if (v  || idx) {
1195     if (nztot) {
1196       /* Sort by increasing column numbers, assuming A and B already sorted */
1197       int imark = -1;
1198       if (v) {
1199         *v = v_p = mat->rowvalues;
1200         for (i=0; i<nzB; i++) {
1201           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202           else break;
1203         }
1204         imark = i;
1205         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1206         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1207       }
1208       if (idx) {
1209         *idx = idx_p = mat->rowindices;
1210         if (imark > -1) {
1211           for (i=0; i<imark; i++) {
1212             idx_p[i] = cmap[cworkB[i]];
1213           }
1214         } else {
1215           for (i=0; i<nzB; i++) {
1216             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217             else break;
1218           }
1219           imark = i;
1220         }
1221         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1222         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1223       }
1224     } else {
1225       if (idx) *idx = 0;
1226       if (v)   *v   = 0;
1227     }
1228   }
1229   *nz = nztot;
1230   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1231   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1240 
1241   PetscFunctionBegin;
1242   if (aij->getrowactive == PETSC_FALSE) {
1243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244   }
1245   aij->getrowactive = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatNorm_MPIAIJ"
1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252 {
1253   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1254   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256   PetscReal    sum = 0.0;
1257   PetscScalar  *v;
1258 
1259   PetscFunctionBegin;
1260   if (aij->size == 1) {
1261     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1262   } else {
1263     if (type == NORM_FROBENIUS) {
1264       v = amat->a;
1265       for (i=0; i<amat->nz; i++) {
1266 #if defined(PETSC_USE_COMPLEX)
1267         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268 #else
1269         sum += (*v)*(*v); v++;
1270 #endif
1271       }
1272       v = bmat->a;
1273       for (i=0; i<bmat->nz; i++) {
1274 #if defined(PETSC_USE_COMPLEX)
1275         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276 #else
1277         sum += (*v)*(*v); v++;
1278 #endif
1279       }
1280       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1281       *norm = sqrt(*norm);
1282     } else if (type == NORM_1) { /* max column norm */
1283       PetscReal *tmp,*tmp2;
1284       int    *jj,*garray = aij->garray;
1285       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1286       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1287       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1288       *norm = 0.0;
1289       v = amat->a; jj = amat->j;
1290       for (j=0; j<amat->nz; j++) {
1291         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1292       }
1293       v = bmat->a; jj = bmat->j;
1294       for (j=0; j<bmat->nz; j++) {
1295         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296       }
1297       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1298       for (j=0; j<mat->N; j++) {
1299         if (tmp2[j] > *norm) *norm = tmp2[j];
1300       }
1301       ierr = PetscFree(tmp);CHKERRQ(ierr);
1302       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1303     } else if (type == NORM_INFINITY) { /* max row norm */
1304       PetscReal ntemp = 0.0;
1305       for (j=0; j<aij->A->m; j++) {
1306         v = amat->a + amat->i[j] + shift;
1307         sum = 0.0;
1308         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309           sum += PetscAbsScalar(*v); v++;
1310         }
1311         v = bmat->a + bmat->i[j] + shift;
1312         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313           sum += PetscAbsScalar(*v); v++;
1314         }
1315         if (sum > ntemp) ntemp = sum;
1316       }
1317       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1318     } else {
1319       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatTranspose_MPIAIJ"
1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328 {
1329   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1330   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1331   int          ierr,shift = Aloc->indexshift;
1332   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333   Mat          B;
1334   PetscScalar  *array;
1335 
1336   PetscFunctionBegin;
1337   if (!matout && M != N) {
1338     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339   }
1340 
1341   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1342 
1343   /* copy over the A part */
1344   Aloc = (Mat_SeqAIJ*)a->A->data;
1345   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346   row = a->rstart;
1347   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348   for (i=0; i<m; i++) {
1349     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1350     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351   }
1352   aj = Aloc->j;
1353   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1354 
1355   /* copy over the B part */
1356   Aloc = (Mat_SeqAIJ*)a->B->data;
1357   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358   row  = a->rstart;
1359   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1360   ct   = cols;
1361   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362   for (i=0; i<m; i++) {
1363     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1364     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365   }
1366   ierr = PetscFree(ct);CHKERRQ(ierr);
1367   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1368   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369   if (matout) {
1370     *matout = B;
1371   } else {
1372     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380 {
1381   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382   Mat        a = aij->A,b = aij->B;
1383   int        ierr,s1,s2,s3;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1392   }
1393   if (ll) {
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1397   }
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   if (rr) {
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1413 int MatPrintHelp_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1416   int        ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428 {
1429   PetscFunctionBegin;
1430   *bs = 1;
1431   PetscFunctionReturn(0);
1432 }
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1435 int MatSetUnfactored_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPIAIJ"
1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450   Mat        a,b,c,d;
1451   PetscTruth flg;
1452   int        ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1456   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475   PetscTruth flg;
1476 
1477   PetscFunctionBegin;
1478   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1479   if (str != SAME_NONZERO_PATTERN || !flg) {
1480     /* because of the column compression in the off-processor part of the matrix a->B,
1481        the number of columns in a->B and b->B may be different, hence we cannot call
1482        the MatCopy() directly on the two parts. If need be, we can provide a more
1483        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484        then copying the submatrices */
1485     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1486   } else {
1487     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1488     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1495 int MatSetUpPreallocation_MPIAIJ(Mat A)
1496 {
1497   int        ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1511 #endif
1512 
1513 #include "petscblaslapack.h"
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatAXPY_MPIAIJ"
1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518 {
1519   int        ierr,one;
1520   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521   Mat_SeqAIJ *x,*y;
1522 
1523   PetscFunctionBegin;
1524   if (str == SAME_NONZERO_PATTERN) {
1525     x  = (Mat_SeqAIJ *)xx->A->data;
1526     y  = (Mat_SeqAIJ *)yy->A->data;
1527     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528     x  = (Mat_SeqAIJ *)xx->B->data;
1529     y  = (Mat_SeqAIJ *)yy->B->data;
1530     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531   } else {
1532     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* -------------------------------------------------------------------*/
1538 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1539        MatGetRow_MPIAIJ,
1540        MatRestoreRow_MPIAIJ,
1541        MatMult_MPIAIJ,
1542        MatMultAdd_MPIAIJ,
1543        MatMultTranspose_MPIAIJ,
1544        MatMultTransposeAdd_MPIAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        MatRelax_MPIAIJ,
1552        MatTranspose_MPIAIJ,
1553        MatGetInfo_MPIAIJ,
1554        MatEqual_MPIAIJ,
1555        MatGetDiagonal_MPIAIJ,
1556        MatDiagonalScale_MPIAIJ,
1557        MatNorm_MPIAIJ,
1558        MatAssemblyBegin_MPIAIJ,
1559        MatAssemblyEnd_MPIAIJ,
1560        0,
1561        MatSetOption_MPIAIJ,
1562        MatZeroEntries_MPIAIJ,
1563        MatZeroRows_MPIAIJ,
1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565        MatLUFactorSymbolic_MPIAIJ_TFS,
1566 #else
1567        0,
1568 #endif
1569        0,
1570        0,
1571        0,
1572        MatSetUpPreallocation_MPIAIJ,
1573        0,
1574        0,
1575        0,
1576        0,
1577        MatDuplicate_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581        0,
1582        MatAXPY_MPIAIJ,
1583        MatGetSubMatrices_MPIAIJ,
1584        MatIncreaseOverlap_MPIAIJ,
1585        MatGetValues_MPIAIJ,
1586        MatCopy_MPIAIJ,
1587        MatPrintHelp_MPIAIJ,
1588        MatScale_MPIAIJ,
1589        0,
1590        0,
1591        0,
1592        MatGetBlockSize_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596        0,
1597        MatFDColoringCreate_MPIAIJ,
1598        0,
1599        MatSetUnfactored_MPIAIJ,
1600        0,
1601        0,
1602        MatGetSubMatrix_MPIAIJ,
1603        MatDestroy_MPIAIJ,
1604        MatView_MPIAIJ,
1605        MatGetPetscMaps_Petsc,
1606        0,
1607        0,
1608        0,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatSetColoring_MPIAIJ,
1615        MatSetValuesAdic_MPIAIJ,
1616        MatSetValuesAdifor_MPIAIJ
1617 };
1618 
1619 /* ----------------------------------------------------------------------------------------*/
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1624 int MatStoreValues_MPIAIJ(Mat mat)
1625 {
1626   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627   int        ierr;
1628 
1629   PetscFunctionBegin;
1630   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1631   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1639 int MatRetrieveValues_MPIAIJ(Mat mat)
1640 {
1641   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1642   int        ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1646   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 #include "petscpc.h"
1652 EXTERN_C_BEGIN
1653 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1654 EXTERN_C_END
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatCreate_MPIAIJ"
1659 int MatCreate_MPIAIJ(Mat B)
1660 {
1661   Mat_MPIAIJ *b;
1662   int        ierr,i,size;
1663 
1664   PetscFunctionBegin;
1665   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1666 
1667   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1668   B->data         = (void*)b;
1669   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1670   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1671   B->factor       = 0;
1672   B->assembled    = PETSC_FALSE;
1673   B->mapping      = 0;
1674 
1675   B->insertmode      = NOT_SET_VALUES;
1676   b->size            = size;
1677   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1678 
1679   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1680   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1681 
1682   /* the information in the maps duplicates the information computed below, eventually
1683      we should remove the duplicate information that is not contained in the maps */
1684   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1685   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1686 
1687   /* build local table of row and column ownerships */
1688   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1689   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690   b->cowners = b->rowners + b->size + 2;
1691   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1692   b->rowners[0] = 0;
1693   for (i=2; i<=b->size; i++) {
1694     b->rowners[i] += b->rowners[i-1];
1695   }
1696   b->rstart = b->rowners[b->rank];
1697   b->rend   = b->rowners[b->rank+1];
1698   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1699   b->cowners[0] = 0;
1700   for (i=2; i<=b->size; i++) {
1701     b->cowners[i] += b->cowners[i-1];
1702   }
1703   b->cstart = b->cowners[b->rank];
1704   b->cend   = b->cowners[b->rank+1];
1705 
1706   /* build cache for off array entries formed */
1707   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1708   b->donotstash  = PETSC_FALSE;
1709   b->colmap      = 0;
1710   b->garray      = 0;
1711   b->roworiented = PETSC_TRUE;
1712 
1713   /* stuff used for matrix vector multiply */
1714   b->lvec      = PETSC_NULL;
1715   b->Mvctx     = PETSC_NULL;
1716 
1717   /* stuff for MatGetRow() */
1718   b->rowindices   = 0;
1719   b->rowvalues    = 0;
1720   b->getrowactive = PETSC_FALSE;
1721 
1722   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1723                                      "MatStoreValues_MPIAIJ",
1724                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1725   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1726                                      "MatRetrieveValues_MPIAIJ",
1727                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1729                                      "MatGetDiagonalBlock_MPIAIJ",
1730                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1738 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1739 {
1740   Mat        mat;
1741   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1742   int        ierr;
1743 
1744   PetscFunctionBegin;
1745   *newmat       = 0;
1746   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1747   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1748   a    = (Mat_MPIAIJ*)mat->data;
1749   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1750   mat->factor       = matin->factor;
1751   mat->assembled    = PETSC_TRUE;
1752   mat->insertmode   = NOT_SET_VALUES;
1753   mat->preallocated = PETSC_TRUE;
1754 
1755   a->rstart       = oldmat->rstart;
1756   a->rend         = oldmat->rend;
1757   a->cstart       = oldmat->cstart;
1758   a->cend         = oldmat->cend;
1759   a->size         = oldmat->size;
1760   a->rank         = oldmat->rank;
1761   a->donotstash   = oldmat->donotstash;
1762   a->roworiented  = oldmat->roworiented;
1763   a->rowindices   = 0;
1764   a->rowvalues    = 0;
1765   a->getrowactive = PETSC_FALSE;
1766 
1767   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1768   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1769   if (oldmat->colmap) {
1770 #if defined (PETSC_USE_CTABLE)
1771     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1772 #else
1773     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1774     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1775     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1776 #endif
1777   } else a->colmap = 0;
1778   if (oldmat->garray) {
1779     int len;
1780     len  = oldmat->B->n;
1781     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1782     PetscLogObjectMemory(mat,len*sizeof(int));
1783     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1784   } else a->garray = 0;
1785 
1786   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1787   PetscLogObjectParent(mat,a->lvec);
1788   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1789   PetscLogObjectParent(mat,a->Mvctx);
1790   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1791   PetscLogObjectParent(mat,a->A);
1792   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1793   PetscLogObjectParent(mat,a->B);
1794   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1795   *newmat = mat;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #include "petscsys.h"
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatLoad_MPIAIJ"
1804 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1805 {
1806   Mat          A;
1807   PetscScalar  *vals,*svals;
1808   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1809   MPI_Status   status;
1810   int          i,nz,ierr,j,rstart,rend,fd;
1811   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1812   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1813   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1814 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1815   PetscTruth   flag;
1816 #endif
1817 
1818   PetscFunctionBegin;
1819   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1820   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1821   if (!rank) {
1822     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1823     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1824     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1825     if (header[3] < 0) {
1826       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1827     }
1828   }
1829 
1830   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1831   M = header[1]; N = header[2];
1832   /* determine ownership of all rows */
1833   m = M/size + ((M % size) > rank);
1834   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1835   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1836   rowners[0] = 0;
1837   for (i=2; i<=size; i++) {
1838     rowners[i] += rowners[i-1];
1839   }
1840   rstart = rowners[rank];
1841   rend   = rowners[rank+1];
1842 
1843   /* distribute row lengths to all processors */
1844   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1845   offlens = ourlens + (rend-rstart);
1846   if (!rank) {
1847     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1848     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1849     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1850     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1851     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1852     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1853   } else {
1854     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1855   }
1856 
1857   if (!rank) {
1858     /* calculate the number of nonzeros on each processor */
1859     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1860     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1861     for (i=0; i<size; i++) {
1862       for (j=rowners[i]; j< rowners[i+1]; j++) {
1863         procsnz[i] += rowlengths[j];
1864       }
1865     }
1866     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1867 
1868     /* determine max buffer needed and allocate it */
1869     maxnz = 0;
1870     for (i=0; i<size; i++) {
1871       maxnz = PetscMax(maxnz,procsnz[i]);
1872     }
1873     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1874 
1875     /* read in my part of the matrix column indices  */
1876     nz   = procsnz[0];
1877     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1878     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1879 
1880     /* read in every one elses and ship off */
1881     for (i=1; i<size; i++) {
1882       nz   = procsnz[i];
1883       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1884       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1885     }
1886     ierr = PetscFree(cols);CHKERRQ(ierr);
1887   } else {
1888     /* determine buffer space needed for message */
1889     nz = 0;
1890     for (i=0; i<m; i++) {
1891       nz += ourlens[i];
1892     }
1893     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1894 
1895     /* receive message of column indices*/
1896     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1897     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1898     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1899   }
1900 
1901   /* determine column ownership if matrix is not square */
1902   if (N != M) {
1903     n      = N/size + ((N % size) > rank);
1904     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1905     cstart = cend - n;
1906   } else {
1907     cstart = rstart;
1908     cend   = rend;
1909     n      = cend - cstart;
1910   }
1911 
1912   /* loop over local rows, determining number of off diagonal entries */
1913   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1914   jj = 0;
1915   for (i=0; i<m; i++) {
1916     for (j=0; j<ourlens[i]; j++) {
1917       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1918       jj++;
1919     }
1920   }
1921 
1922   /* create our matrix */
1923   for (i=0; i<m; i++) {
1924     ourlens[i] -= offlens[i];
1925   }
1926   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1927   A = *newmat;
1928   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1929   for (i=0; i<m; i++) {
1930     ourlens[i] += offlens[i];
1931   }
1932 
1933   if (!rank) {
1934     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1935 
1936     /* read in my part of the matrix numerical values  */
1937     nz   = procsnz[0];
1938     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1939 
1940     /* insert into matrix */
1941     jj      = rstart;
1942     smycols = mycols;
1943     svals   = vals;
1944     for (i=0; i<m; i++) {
1945       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1946       smycols += ourlens[i];
1947       svals   += ourlens[i];
1948       jj++;
1949     }
1950 
1951     /* read in other processors and ship out */
1952     for (i=1; i<size; i++) {
1953       nz   = procsnz[i];
1954       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1955       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1956     }
1957     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1958   } else {
1959     /* receive numeric values */
1960     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1961 
1962     /* receive message of values*/
1963     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1964     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1965     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1966 
1967     /* insert into matrix */
1968     jj      = rstart;
1969     smycols = mycols;
1970     svals   = vals;
1971     for (i=0; i<m; i++) {
1972       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1973       smycols += ourlens[i];
1974       svals   += ourlens[i];
1975       jj++;
1976     }
1977   }
1978   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1979   ierr = PetscFree(vals);CHKERRQ(ierr);
1980   ierr = PetscFree(mycols);CHKERRQ(ierr);
1981   ierr = PetscFree(rowners);CHKERRQ(ierr);
1982 
1983   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985 #if defined(PETSC_HAVE_SPOOLES)
1986   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
1987   if (flag) { ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); }
1988 #endif
1989 #if defined(PETSC_HAVE_SUPERLUDIST)
1990   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
1991   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
1992 #endif
1993   PetscFunctionReturn(0);
1994 }
1995 EXTERN_C_END
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1999 /*
2000     Not great since it makes two copies of the submatrix, first an SeqAIJ
2001   in local and then by concatenating the local matrices the end result.
2002   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2003 */
2004 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2005 {
2006   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2007   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2008   Mat          *local,M,Mreuse;
2009   PetscScalar  *vwork,*aa;
2010   MPI_Comm     comm = mat->comm;
2011   Mat_SeqAIJ   *aij;
2012 
2013 
2014   PetscFunctionBegin;
2015   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2016   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2017 
2018   if (call ==  MAT_REUSE_MATRIX) {
2019     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2020     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2021     local = &Mreuse;
2022     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2023   } else {
2024     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2025     Mreuse = *local;
2026     ierr   = PetscFree(local);CHKERRQ(ierr);
2027   }
2028 
2029   /*
2030       m - number of local rows
2031       n - number of columns (same on all processors)
2032       rstart - first row in new global matrix generated
2033   */
2034   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2035   if (call == MAT_INITIAL_MATRIX) {
2036     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2037     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2038     ii  = aij->i;
2039     jj  = aij->j;
2040 
2041     /*
2042         Determine the number of non-zeros in the diagonal and off-diagonal
2043         portions of the matrix in order to do correct preallocation
2044     */
2045 
2046     /* first get start and end of "diagonal" columns */
2047     if (csize == PETSC_DECIDE) {
2048       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2049       if (mglobal == n) { /* square matrix */
2050 	nlocal = m;
2051       } else {
2052         nlocal = n/size + ((n % size) > rank);
2053       }
2054     } else {
2055       nlocal = csize;
2056     }
2057     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2058     rstart = rend - nlocal;
2059     if (rank == size - 1 && rend != n) {
2060       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2061     }
2062 
2063     /* next, compute all the lengths */
2064     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2065     olens = dlens + m;
2066     for (i=0; i<m; i++) {
2067       jend = ii[i+1] - ii[i];
2068       olen = 0;
2069       dlen = 0;
2070       for (j=0; j<jend; j++) {
2071         if (*jj < rstart || *jj >= rend) olen++;
2072         else dlen++;
2073         jj++;
2074       }
2075       olens[i] = olen;
2076       dlens[i] = dlen;
2077     }
2078     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2079     ierr = PetscFree(dlens);CHKERRQ(ierr);
2080   } else {
2081     int ml,nl;
2082 
2083     M = *newmat;
2084     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2085     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2086     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2087     /*
2088          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2089        rather than the slower MatSetValues().
2090     */
2091     M->was_assembled = PETSC_TRUE;
2092     M->assembled     = PETSC_FALSE;
2093   }
2094   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2095   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2096   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2097   ii  = aij->i;
2098   jj  = aij->j;
2099   aa  = aij->a;
2100   for (i=0; i<m; i++) {
2101     row   = rstart + i;
2102     nz    = ii[i+1] - ii[i];
2103     cwork = jj;     jj += nz;
2104     vwork = aa;     aa += nz;
2105     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2106   }
2107 
2108   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2109   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2110   *newmat = M;
2111 
2112   /* save submatrix used in processor for next request */
2113   if (call ==  MAT_INITIAL_MATRIX) {
2114     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2115     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2116   }
2117 
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2123 /*@C
2124    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2125    (the default parallel PETSc format).  For good matrix assembly performance
2126    the user should preallocate the matrix storage by setting the parameters
2127    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2128    performance can be increased by more than a factor of 50.
2129 
2130    Collective on MPI_Comm
2131 
2132    Input Parameters:
2133 +  A - the matrix
2134 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2135            (same value is used for all local rows)
2136 .  d_nnz - array containing the number of nonzeros in the various rows of the
2137            DIAGONAL portion of the local submatrix (possibly different for each row)
2138            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2139            The size of this array is equal to the number of local rows, i.e 'm'.
2140            You must leave room for the diagonal entry even if it is zero.
2141 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2142            submatrix (same value is used for all local rows).
2143 -  o_nnz - array containing the number of nonzeros in the various rows of the
2144            OFF-DIAGONAL portion of the local submatrix (possibly different for
2145            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2146            structure. The size of this array is equal to the number
2147            of local rows, i.e 'm'.
2148 
2149    The AIJ format (also called the Yale sparse matrix format or
2150    compressed row storage), is fully compatible with standard Fortran 77
2151    storage.  That is, the stored row and column indices can begin at
2152    either one (as in Fortran) or zero.  See the users manual for details.
2153 
2154    The user MUST specify either the local or global matrix dimensions
2155    (possibly both).
2156 
2157    The parallel matrix is partitioned such that the first m0 rows belong to
2158    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2159    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2160 
2161    The DIAGONAL portion of the local submatrix of a processor can be defined
2162    as the submatrix which is obtained by extraction the part corresponding
2163    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2164    first row that belongs to the processor, and r2 is the last row belonging
2165    to the this processor. This is a square mxm matrix. The remaining portion
2166    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2167 
2168    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2169 
2170    By default, this format uses inodes (identical nodes) when possible.
2171    We search for consecutive rows with the same nonzero structure, thereby
2172    reusing matrix information to achieve increased efficiency.
2173 
2174    Options Database Keys:
2175 +  -mat_aij_no_inode  - Do not use inodes
2176 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2177 -  -mat_aij_oneindex - Internally use indexing starting at 1
2178         rather than 0.  Note that when calling MatSetValues(),
2179         the user still MUST index entries starting at 0!
2180 
2181    Example usage:
2182 
2183    Consider the following 8x8 matrix with 34 non-zero values, that is
2184    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2185    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2186    as follows:
2187 
2188 .vb
2189             1  2  0  |  0  3  0  |  0  4
2190     Proc0   0  5  6  |  7  0  0  |  8  0
2191             9  0 10  | 11  0  0  | 12  0
2192     -------------------------------------
2193            13  0 14  | 15 16 17  |  0  0
2194     Proc1   0 18  0  | 19 20 21  |  0  0
2195             0  0  0  | 22 23  0  | 24  0
2196     -------------------------------------
2197     Proc2  25 26 27  |  0  0 28  | 29  0
2198            30  0  0  | 31 32 33  |  0 34
2199 .ve
2200 
2201    This can be represented as a collection of submatrices as:
2202 
2203 .vb
2204       A B C
2205       D E F
2206       G H I
2207 .ve
2208 
2209    Where the submatrices A,B,C are owned by proc0, D,E,F are
2210    owned by proc1, G,H,I are owned by proc2.
2211 
2212    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2213    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2214    The 'M','N' parameters are 8,8, and have the same values on all procs.
2215 
2216    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2217    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2218    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2219    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2220    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2221    matrix, ans [DF] as another SeqAIJ matrix.
2222 
2223    When d_nz, o_nz parameters are specified, d_nz storage elements are
2224    allocated for every row of the local diagonal submatrix, and o_nz
2225    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2226    One way to choose d_nz and o_nz is to use the max nonzerors per local
2227    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2228    In this case, the values of d_nz,o_nz are:
2229 .vb
2230      proc0 : dnz = 2, o_nz = 2
2231      proc1 : dnz = 3, o_nz = 2
2232      proc2 : dnz = 1, o_nz = 4
2233 .ve
2234    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2235    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2236    for proc3. i.e we are using 12+15+10=37 storage locations to store
2237    34 values.
2238 
2239    When d_nnz, o_nnz parameters are specified, the storage is specified
2240    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2241    In the above case the values for d_nnz,o_nnz are:
2242 .vb
2243      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2244      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2245      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2246 .ve
2247    Here the space allocated is sum of all the above values i.e 34, and
2248    hence pre-allocation is perfect.
2249 
2250    Level: intermediate
2251 
2252 .keywords: matrix, aij, compressed row, sparse, parallel
2253 
2254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2255 @*/
2256 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2257 {
2258   Mat_MPIAIJ   *b;
2259   int          ierr,i;
2260   PetscTruth   flg2;
2261 
2262   PetscFunctionBegin;
2263   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2264   if (!flg2) PetscFunctionReturn(0);
2265   B->preallocated = PETSC_TRUE;
2266   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2267   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2268   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2269   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2270   if (d_nnz) {
2271     for (i=0; i<B->m; i++) {
2272       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2273     }
2274   }
2275   if (o_nnz) {
2276     for (i=0; i<B->m; i++) {
2277       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2278     }
2279   }
2280   b = (Mat_MPIAIJ*)B->data;
2281 
2282   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2283   PetscLogObjectParent(B,b->A);
2284   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2285   PetscLogObjectParent(B,b->B);
2286 
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatCreateMPIAIJ"
2292 /*@C
2293    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2294    (the default parallel PETSc format).  For good matrix assembly performance
2295    the user should preallocate the matrix storage by setting the parameters
2296    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2297    performance can be increased by more than a factor of 50.
2298 
2299    Collective on MPI_Comm
2300 
2301    Input Parameters:
2302 +  comm - MPI communicator
2303 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2304            This value should be the same as the local size used in creating the
2305            y vector for the matrix-vector product y = Ax.
2306 .  n - This value should be the same as the local size used in creating the
2307        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2308        calculated if N is given) For square matrices n is almost always m.
2309 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2310 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2311 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2312            (same value is used for all local rows)
2313 .  d_nnz - array containing the number of nonzeros in the various rows of the
2314            DIAGONAL portion of the local submatrix (possibly different for each row)
2315            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2316            The size of this array is equal to the number of local rows, i.e 'm'.
2317            You must leave room for the diagonal entry even if it is zero.
2318 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2319            submatrix (same value is used for all local rows).
2320 -  o_nnz - array containing the number of nonzeros in the various rows of the
2321            OFF-DIAGONAL portion of the local submatrix (possibly different for
2322            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2323            structure. The size of this array is equal to the number
2324            of local rows, i.e 'm'.
2325 
2326    Output Parameter:
2327 .  A - the matrix
2328 
2329    Notes:
2330    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2331    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2332    storage requirements for this matrix.
2333 
2334    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2335    processor than it must be used on all processors that share the object for
2336    that argument.
2337 
2338    The AIJ format (also called the Yale sparse matrix format or
2339    compressed row storage), is fully compatible with standard Fortran 77
2340    storage.  That is, the stored row and column indices can begin at
2341    either one (as in Fortran) or zero.  See the users manual for details.
2342 
2343    The user MUST specify either the local or global matrix dimensions
2344    (possibly both).
2345 
2346    The parallel matrix is partitioned such that the first m0 rows belong to
2347    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2348    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2349 
2350    The DIAGONAL portion of the local submatrix of a processor can be defined
2351    as the submatrix which is obtained by extraction the part corresponding
2352    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2353    first row that belongs to the processor, and r2 is the last row belonging
2354    to the this processor. This is a square mxm matrix. The remaining portion
2355    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2356 
2357    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2358 
2359    By default, this format uses inodes (identical nodes) when possible.
2360    We search for consecutive rows with the same nonzero structure, thereby
2361    reusing matrix information to achieve increased efficiency.
2362 
2363    Options Database Keys:
2364 +  -mat_aij_no_inode  - Do not use inodes
2365 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2366 -  -mat_aij_oneindex - Internally use indexing starting at 1
2367         rather than 0.  Note that when calling MatSetValues(),
2368         the user still MUST index entries starting at 0!
2369 
2370 
2371    Example usage:
2372 
2373    Consider the following 8x8 matrix with 34 non-zero values, that is
2374    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2375    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2376    as follows:
2377 
2378 .vb
2379             1  2  0  |  0  3  0  |  0  4
2380     Proc0   0  5  6  |  7  0  0  |  8  0
2381             9  0 10  | 11  0  0  | 12  0
2382     -------------------------------------
2383            13  0 14  | 15 16 17  |  0  0
2384     Proc1   0 18  0  | 19 20 21  |  0  0
2385             0  0  0  | 22 23  0  | 24  0
2386     -------------------------------------
2387     Proc2  25 26 27  |  0  0 28  | 29  0
2388            30  0  0  | 31 32 33  |  0 34
2389 .ve
2390 
2391    This can be represented as a collection of submatrices as:
2392 
2393 .vb
2394       A B C
2395       D E F
2396       G H I
2397 .ve
2398 
2399    Where the submatrices A,B,C are owned by proc0, D,E,F are
2400    owned by proc1, G,H,I are owned by proc2.
2401 
2402    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2403    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2404    The 'M','N' parameters are 8,8, and have the same values on all procs.
2405 
2406    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2407    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2408    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2409    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2410    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2411    matrix, ans [DF] as another SeqAIJ matrix.
2412 
2413    When d_nz, o_nz parameters are specified, d_nz storage elements are
2414    allocated for every row of the local diagonal submatrix, and o_nz
2415    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2416    One way to choose d_nz and o_nz is to use the max nonzerors per local
2417    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2418    In this case, the values of d_nz,o_nz are:
2419 .vb
2420      proc0 : dnz = 2, o_nz = 2
2421      proc1 : dnz = 3, o_nz = 2
2422      proc2 : dnz = 1, o_nz = 4
2423 .ve
2424    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2425    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2426    for proc3. i.e we are using 12+15+10=37 storage locations to store
2427    34 values.
2428 
2429    When d_nnz, o_nnz parameters are specified, the storage is specified
2430    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2431    In the above case the values for d_nnz,o_nnz are:
2432 .vb
2433      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2434      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2435      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2436 .ve
2437    Here the space allocated is sum of all the above values i.e 34, and
2438    hence pre-allocation is perfect.
2439 
2440    Level: intermediate
2441 
2442 .keywords: matrix, aij, compressed row, sparse, parallel
2443 
2444 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2445 @*/
2446 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2447 {
2448   int ierr,size;
2449 
2450   PetscFunctionBegin;
2451   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2452   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2453   if (size > 1) {
2454     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2455     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2456   } else {
2457     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2458     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2459   }
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2465 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2466 {
2467   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2468   PetscFunctionBegin;
2469   *Ad     = a->A;
2470   *Ao     = a->B;
2471   *colmap = a->garray;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2477 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2478 {
2479   int        ierr,i;
2480   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2481 
2482   PetscFunctionBegin;
2483   if (coloring->ctype == IS_COLORING_LOCAL) {
2484     ISColoringValue *allcolors,*colors;
2485     ISColoring      ocoloring;
2486 
2487     /* set coloring for diagonal portion */
2488     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2489 
2490     /* set coloring for off-diagonal portion */
2491     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2492     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2493     for (i=0; i<a->B->n; i++) {
2494       colors[i] = allcolors[a->garray[i]];
2495     }
2496     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2497     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2498     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2499     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2500   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2501     ISColoringValue *colors;
2502     int             *larray;
2503     ISColoring      ocoloring;
2504 
2505     /* set coloring for diagonal portion */
2506     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2507     for (i=0; i<a->A->n; i++) {
2508       larray[i] = i + a->cstart;
2509     }
2510     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2511     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2512     for (i=0; i<a->A->n; i++) {
2513       colors[i] = coloring->colors[larray[i]];
2514     }
2515     ierr = PetscFree(larray);CHKERRQ(ierr);
2516     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2517     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2518     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2519 
2520     /* set coloring for off-diagonal portion */
2521     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2522     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2523     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2524     for (i=0; i<a->B->n; i++) {
2525       colors[i] = coloring->colors[larray[i]];
2526     }
2527     ierr = PetscFree(larray);CHKERRQ(ierr);
2528     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2529     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2530     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2531   } else {
2532     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2533   }
2534 
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2540 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2541 {
2542   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2543   int        ierr;
2544 
2545   PetscFunctionBegin;
2546   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2547   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2553 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2554 {
2555   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2556   int        ierr;
2557 
2558   PetscFunctionBegin;
2559   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2560   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatMerge"
2566 /*@C
2567       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2568                  matrices from each processor
2569 
2570     Collective on MPI_Comm
2571 
2572    Input Parameters:
2573 +    comm - the communicators the parallel matrix will live on
2574 -    inmat - the input sequential matrices
2575 
2576    Output Parameter:
2577 .    outmat - the parallel matrix generated
2578 
2579     Level: advanced
2580 
2581    Notes: The number of columns of the matrix in EACH of the seperate files
2582       MUST be the same.
2583 
2584 @*/
2585 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2586 {
2587   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2588   PetscScalar *values;
2589   PetscMap    columnmap,rowmap;
2590 
2591   PetscFunctionBegin;
2592 
2593   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2594 
2595   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2596   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2597   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2598   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2599   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2600   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2601 
2602   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2603   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2604   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2605   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2606   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2607 
2608   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2609   for (i=0;i<m;i++) {
2610     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2611     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2612     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2613   }
2614   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2615   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2616 
2617   for (i=0;i<m;i++) {
2618     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2619     I    = i + rstart;
2620     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2621     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2622   }
2623   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2624   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2625   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2626 
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatFileSplit"
2632 int MatFileSplit(Mat A,char *outfile)
2633 {
2634   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2635   PetscViewer out;
2636   char        *name;
2637   Mat         B;
2638   PetscScalar *values;
2639 
2640   PetscFunctionBegin;
2641 
2642   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2643   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2644   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2645   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2646   for (i=0;i<m;i++) {
2647     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2648     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2649     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2650   }
2651   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2653 
2654   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2655   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2656   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2657   sprintf(name,"%s.%d",outfile,rank);
2658   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2659   ierr = PetscFree(name);
2660   ierr = MatView(B,out);CHKERRQ(ierr);
2661   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2662   ierr = MatDestroy(B);CHKERRQ(ierr);
2663   PetscFunctionReturn(0);
2664 }
2665