xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0b498fdb2a73e86aac2706d824139bbc5f26bf18)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
720 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
724   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
725   PetscTruth        isdraw,isascii,flg;
726   PetscViewer       sviewer;
727   PetscViewerFormat format;
728 
729   PetscFunctionBegin;
730   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
732   if (isascii) {
733     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
734     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
735       MatInfo info;
736       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
737       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
738       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
739       if (flg) {
740         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
741 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
742       } else {
743         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
744 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
745       }
746       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
748       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
749       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
750       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
751       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
752       PetscFunctionReturn(0);
753     } else if (format == PETSC_VIEWER_ASCII_INFO) {
754       PetscFunctionReturn(0);
755     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
756 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
757       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
758 #endif
759 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
760       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
761 #endif
762 
763       PetscFunctionReturn(0);
764     }
765   } else if (isdraw) {
766     PetscDraw       draw;
767     PetscTruth isnull;
768     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
769     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
770   }
771 
772   if (size == 1) {
773     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
774     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
775   } else {
776     /* assemble the entire matrix onto first processor. */
777     Mat         A;
778     Mat_SeqAIJ *Aloc;
779     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
780     PetscScalar *a;
781 
782     if (!rank) {
783       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
784     } else {
785       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
786     }
787     PetscLogObjectParent(mat,A);
788 
789     /* copy over the A part */
790     Aloc = (Mat_SeqAIJ*)aij->A->data;
791     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
792     row = aij->rstart;
793     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
794     for (i=0; i<m; i++) {
795       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
796       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
797     }
798     aj = Aloc->j;
799     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
800 
801     /* copy over the B part */
802     Aloc = (Mat_SeqAIJ*)aij->B->data;
803     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
804     row  = aij->rstart;
805     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
806     ct   = cols;
807     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
808     for (i=0; i<m; i++) {
809       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
810       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
811     }
812     ierr = PetscFree(ct);CHKERRQ(ierr);
813     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
814     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
815     /*
816        Everyone has to call to draw the matrix since the graphics waits are
817        synchronized across all processors that share the PetscDraw object
818     */
819     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
820     if (!rank) {
821       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
822       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
823     }
824     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
825     ierr = MatDestroy(A);CHKERRQ(ierr);
826   }
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatView_MPIAIJ"
832 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
833 {
834   int        ierr;
835   PetscTruth isascii,isdraw,issocket,isbinary;
836 
837   PetscFunctionBegin;
838   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
839   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
840   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
841   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
842   if (isascii || isdraw || isbinary || issocket) {
843     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
844   } else {
845     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatRelax_MPIAIJ"
854 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
855 {
856   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
857   int          ierr;
858   Vec          bb1;
859   PetscScalar  mone=-1.0;
860 
861   PetscFunctionBegin;
862   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
863 
864   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
865 
866   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
867     if (flag & SOR_ZERO_INITIAL_GUESS) {
868       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
869       its--;
870     }
871 
872     while (its--) {
873       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
874       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
875 
876       /* update rhs: bb1 = bb - B*x */
877       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
878       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
879 
880       /* local sweep */
881       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
882       CHKERRQ(ierr);
883     }
884   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
885     if (flag & SOR_ZERO_INITIAL_GUESS) {
886       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
887       its--;
888     }
889     while (its--) {
890       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
891       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
892 
893       /* update rhs: bb1 = bb - B*x */
894       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
895       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
896 
897       /* local sweep */
898       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
899       CHKERRQ(ierr);
900     }
901   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
902     if (flag & SOR_ZERO_INITIAL_GUESS) {
903       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
904       its--;
905     }
906     while (its--) {
907       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
908       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
909 
910       /* update rhs: bb1 = bb - B*x */
911       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
912       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
913 
914       /* local sweep */
915       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
916       CHKERRQ(ierr);
917     }
918   } else {
919     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
920   }
921 
922   ierr = VecDestroy(bb1);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatGetInfo_MPIAIJ"
928 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
929 {
930   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
931   Mat        A = mat->A,B = mat->B;
932   int        ierr;
933   PetscReal  isend[5],irecv[5];
934 
935   PetscFunctionBegin;
936   info->block_size     = 1.0;
937   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
938   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
939   isend[3] = info->memory;  isend[4] = info->mallocs;
940   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
941   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
942   isend[3] += info->memory;  isend[4] += info->mallocs;
943   if (flag == MAT_LOCAL) {
944     info->nz_used      = isend[0];
945     info->nz_allocated = isend[1];
946     info->nz_unneeded  = isend[2];
947     info->memory       = isend[3];
948     info->mallocs      = isend[4];
949   } else if (flag == MAT_GLOBAL_MAX) {
950     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
951     info->nz_used      = irecv[0];
952     info->nz_allocated = irecv[1];
953     info->nz_unneeded  = irecv[2];
954     info->memory       = irecv[3];
955     info->mallocs      = irecv[4];
956   } else if (flag == MAT_GLOBAL_SUM) {
957     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
958     info->nz_used      = irecv[0];
959     info->nz_allocated = irecv[1];
960     info->nz_unneeded  = irecv[2];
961     info->memory       = irecv[3];
962     info->mallocs      = irecv[4];
963   }
964   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
965   info->fill_ratio_needed = 0;
966   info->factor_mallocs    = 0;
967   info->rows_global       = (double)matin->M;
968   info->columns_global    = (double)matin->N;
969   info->rows_local        = (double)matin->m;
970   info->columns_local     = (double)matin->N;
971 
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSetOption_MPIAIJ"
977 int MatSetOption_MPIAIJ(Mat A,MatOption op)
978 {
979   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
980   int        ierr;
981 
982   PetscFunctionBegin;
983   switch (op) {
984   case MAT_NO_NEW_NONZERO_LOCATIONS:
985   case MAT_YES_NEW_NONZERO_LOCATIONS:
986   case MAT_COLUMNS_UNSORTED:
987   case MAT_COLUMNS_SORTED:
988   case MAT_NEW_NONZERO_ALLOCATION_ERR:
989   case MAT_KEEP_ZEROED_ROWS:
990   case MAT_NEW_NONZERO_LOCATION_ERR:
991   case MAT_USE_INODES:
992   case MAT_DO_NOT_USE_INODES:
993   case MAT_IGNORE_ZERO_ENTRIES:
994     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
995     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
996     break;
997   case MAT_ROW_ORIENTED:
998     a->roworiented = PETSC_TRUE;
999     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1000     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1001     break;
1002   case MAT_ROWS_SORTED:
1003   case MAT_ROWS_UNSORTED:
1004   case MAT_YES_NEW_DIAGONALS:
1005   case MAT_USE_SINGLE_PRECISION_SOLVES:
1006     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1007     break;
1008   case MAT_COLUMN_ORIENTED:
1009     a->roworiented = PETSC_FALSE;
1010     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1011     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1012     break;
1013   case MAT_IGNORE_OFF_PROC_ENTRIES:
1014     a->donotstash = PETSC_TRUE;
1015     break;
1016   case MAT_NO_NEW_DIAGONALS:
1017     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1018   default:
1019     SETERRQ(PETSC_ERR_SUP,"unknown option");
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatGetRow_MPIAIJ"
1026 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1027 {
1028   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1029   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1030   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1031   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1032   int          *cmap,*idx_p;
1033 
1034   PetscFunctionBegin;
1035   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1036   mat->getrowactive = PETSC_TRUE;
1037 
1038   if (!mat->rowvalues && (idx || v)) {
1039     /*
1040         allocate enough space to hold information from the longest row.
1041     */
1042     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1043     int     max = 1,tmp;
1044     for (i=0; i<matin->m; i++) {
1045       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1046       if (max < tmp) { max = tmp; }
1047     }
1048     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1049     mat->rowindices = (int*)(mat->rowvalues + max);
1050   }
1051 
1052   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1053   lrow = row - rstart;
1054 
1055   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1056   if (!v)   {pvA = 0; pvB = 0;}
1057   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1058   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1059   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1060   nztot = nzA + nzB;
1061 
1062   cmap  = mat->garray;
1063   if (v  || idx) {
1064     if (nztot) {
1065       /* Sort by increasing column numbers, assuming A and B already sorted */
1066       int imark = -1;
1067       if (v) {
1068         *v = v_p = mat->rowvalues;
1069         for (i=0; i<nzB; i++) {
1070           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1071           else break;
1072         }
1073         imark = i;
1074         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1075         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1076       }
1077       if (idx) {
1078         *idx = idx_p = mat->rowindices;
1079         if (imark > -1) {
1080           for (i=0; i<imark; i++) {
1081             idx_p[i] = cmap[cworkB[i]];
1082           }
1083         } else {
1084           for (i=0; i<nzB; i++) {
1085             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1086             else break;
1087           }
1088           imark = i;
1089         }
1090         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1091         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1092       }
1093     } else {
1094       if (idx) *idx = 0;
1095       if (v)   *v   = 0;
1096     }
1097   }
1098   *nz = nztot;
1099   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1100   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1106 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1107 {
1108   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1109 
1110   PetscFunctionBegin;
1111   if (aij->getrowactive == PETSC_FALSE) {
1112     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1113   }
1114   aij->getrowactive = PETSC_FALSE;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatNorm_MPIAIJ"
1120 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1121 {
1122   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1123   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1124   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1125   PetscReal    sum = 0.0;
1126   PetscScalar  *v;
1127 
1128   PetscFunctionBegin;
1129   if (aij->size == 1) {
1130     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1131   } else {
1132     if (type == NORM_FROBENIUS) {
1133       v = amat->a;
1134       for (i=0; i<amat->nz; i++) {
1135 #if defined(PETSC_USE_COMPLEX)
1136         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1137 #else
1138         sum += (*v)*(*v); v++;
1139 #endif
1140       }
1141       v = bmat->a;
1142       for (i=0; i<bmat->nz; i++) {
1143 #if defined(PETSC_USE_COMPLEX)
1144         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1145 #else
1146         sum += (*v)*(*v); v++;
1147 #endif
1148       }
1149       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1150       *norm = sqrt(*norm);
1151     } else if (type == NORM_1) { /* max column norm */
1152       PetscReal *tmp,*tmp2;
1153       int    *jj,*garray = aij->garray;
1154       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1155       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1156       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1157       *norm = 0.0;
1158       v = amat->a; jj = amat->j;
1159       for (j=0; j<amat->nz; j++) {
1160         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1161       }
1162       v = bmat->a; jj = bmat->j;
1163       for (j=0; j<bmat->nz; j++) {
1164         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1165       }
1166       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1167       for (j=0; j<mat->N; j++) {
1168         if (tmp2[j] > *norm) *norm = tmp2[j];
1169       }
1170       ierr = PetscFree(tmp);CHKERRQ(ierr);
1171       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1172     } else if (type == NORM_INFINITY) { /* max row norm */
1173       PetscReal ntemp = 0.0;
1174       for (j=0; j<aij->A->m; j++) {
1175         v = amat->a + amat->i[j] + shift;
1176         sum = 0.0;
1177         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1178           sum += PetscAbsScalar(*v); v++;
1179         }
1180         v = bmat->a + bmat->i[j] + shift;
1181         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1182           sum += PetscAbsScalar(*v); v++;
1183         }
1184         if (sum > ntemp) ntemp = sum;
1185       }
1186       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1187     } else {
1188       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1189     }
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatTranspose_MPIAIJ"
1196 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1197 {
1198   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1199   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1200   int          ierr,shift = Aloc->indexshift;
1201   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1202   Mat          B;
1203   PetscScalar  *array;
1204 
1205   PetscFunctionBegin;
1206   if (!matout && M != N) {
1207     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1208   }
1209 
1210   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1211 
1212   /* copy over the A part */
1213   Aloc = (Mat_SeqAIJ*)a->A->data;
1214   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1215   row = a->rstart;
1216   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1217   for (i=0; i<m; i++) {
1218     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1219     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1220   }
1221   aj = Aloc->j;
1222   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1223 
1224   /* copy over the B part */
1225   Aloc = (Mat_SeqAIJ*)a->B->data;
1226   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1227   row  = a->rstart;
1228   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1229   ct   = cols;
1230   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1231   for (i=0; i<m; i++) {
1232     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1233     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1234   }
1235   ierr = PetscFree(ct);CHKERRQ(ierr);
1236   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1237   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1238   if (matout) {
1239     *matout = B;
1240   } else {
1241     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1248 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1249 {
1250   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1251   Mat        a = aij->A,b = aij->B;
1252   int        ierr,s1,s2,s3;
1253 
1254   PetscFunctionBegin;
1255   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1256   if (rr) {
1257     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1258     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1259     /* Overlap communication with computation. */
1260     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1261   }
1262   if (ll) {
1263     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1264     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1265     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1266   }
1267   /* scale  the diagonal block */
1268   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1269 
1270   if (rr) {
1271     /* Do a scatter end and then right scale the off-diagonal block */
1272     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1273     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1274   }
1275 
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1282 int MatPrintHelp_MPIAIJ(Mat A)
1283 {
1284   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1285   int        ierr;
1286 
1287   PetscFunctionBegin;
1288   if (!a->rank) {
1289     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1296 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1297 {
1298   PetscFunctionBegin;
1299   *bs = 1;
1300   PetscFunctionReturn(0);
1301 }
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1304 int MatSetUnfactored_MPIAIJ(Mat A)
1305 {
1306   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1307   int        ierr;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatEqual_MPIAIJ"
1316 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1317 {
1318   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1319   Mat        a,b,c,d;
1320   PetscTruth flg;
1321   int        ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1325   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1326   a = matA->A; b = matA->B;
1327   c = matB->A; d = matB->B;
1328 
1329   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1330   if (flg == PETSC_TRUE) {
1331     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1332   }
1333   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatCopy_MPIAIJ"
1339 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1340 {
1341   int        ierr;
1342   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1343   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1344   PetscTruth flg;
1345 
1346   PetscFunctionBegin;
1347   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1348   if (str != SAME_NONZERO_PATTERN || !flg) {
1349     /* because of the column compression in the off-processor part of the matrix a->B,
1350        the number of columns in a->B and b->B may be different, hence we cannot call
1351        the MatCopy() directly on the two parts. If need be, we can provide a more
1352        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1353        then copying the submatrices */
1354     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1355   } else {
1356     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1357     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1364 int MatSetUpPreallocation_MPIAIJ(Mat A)
1365 {
1366   int        ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1374 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1375 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1376 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1377 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1378 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1379 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1380 #endif
1381 
1382 #include "petscblaslapack.h"
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatAXPY_MPIAIJ"
1386 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1387 {
1388   int        ierr,one;
1389   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1390   Mat_SeqAIJ *x,*y;
1391 
1392   PetscFunctionBegin;
1393   if (str == SAME_NONZERO_PATTERN) {
1394     x  = (Mat_SeqAIJ *)xx->A->data;
1395     y  = (Mat_SeqAIJ *)yy->A->data;
1396     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1397     x  = (Mat_SeqAIJ *)xx->B->data;
1398     y  = (Mat_SeqAIJ *)yy->B->data;
1399     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1400   } else {
1401     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /* -------------------------------------------------------------------*/
1407 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1408        MatGetRow_MPIAIJ,
1409        MatRestoreRow_MPIAIJ,
1410        MatMult_MPIAIJ,
1411        MatMultAdd_MPIAIJ,
1412        MatMultTranspose_MPIAIJ,
1413        MatMultTransposeAdd_MPIAIJ,
1414        0,
1415        0,
1416        0,
1417        0,
1418        0,
1419        0,
1420        MatRelax_MPIAIJ,
1421        MatTranspose_MPIAIJ,
1422        MatGetInfo_MPIAIJ,
1423        MatEqual_MPIAIJ,
1424        MatGetDiagonal_MPIAIJ,
1425        MatDiagonalScale_MPIAIJ,
1426        MatNorm_MPIAIJ,
1427        MatAssemblyBegin_MPIAIJ,
1428        MatAssemblyEnd_MPIAIJ,
1429        0,
1430        MatSetOption_MPIAIJ,
1431        MatZeroEntries_MPIAIJ,
1432        MatZeroRows_MPIAIJ,
1433 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1434        MatLUFactorSymbolic_MPIAIJ_TFS,
1435 #else
1436        0,
1437 #endif
1438        0,
1439        0,
1440        0,
1441        MatSetUpPreallocation_MPIAIJ,
1442        0,
1443        0,
1444        0,
1445        0,
1446        MatDuplicate_MPIAIJ,
1447        0,
1448        0,
1449        0,
1450        0,
1451        MatAXPY_MPIAIJ,
1452        MatGetSubMatrices_MPIAIJ,
1453        MatIncreaseOverlap_MPIAIJ,
1454        MatGetValues_MPIAIJ,
1455        MatCopy_MPIAIJ,
1456        MatPrintHelp_MPIAIJ,
1457        MatScale_MPIAIJ,
1458        0,
1459        0,
1460        0,
1461        MatGetBlockSize_MPIAIJ,
1462        0,
1463        0,
1464        0,
1465        0,
1466        MatFDColoringCreate_MPIAIJ,
1467        0,
1468        MatSetUnfactored_MPIAIJ,
1469        0,
1470        0,
1471        MatGetSubMatrix_MPIAIJ,
1472        MatDestroy_MPIAIJ,
1473        MatView_MPIAIJ,
1474        MatGetPetscMaps_Petsc,
1475        0,
1476        0,
1477        0,
1478        0,
1479        0,
1480        0,
1481        0,
1482        0,
1483        MatSetColoring_MPIAIJ,
1484        MatSetValuesAdic_MPIAIJ,
1485        MatSetValuesAdifor_MPIAIJ
1486 };
1487 
1488 /* ----------------------------------------------------------------------------------------*/
1489 
1490 EXTERN_C_BEGIN
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1493 int MatStoreValues_MPIAIJ(Mat mat)
1494 {
1495   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1496   int        ierr;
1497 
1498   PetscFunctionBegin;
1499   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1500   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 EXTERN_C_END
1504 
1505 EXTERN_C_BEGIN
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1508 int MatRetrieveValues_MPIAIJ(Mat mat)
1509 {
1510   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1511   int        ierr;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1515   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 EXTERN_C_END
1519 
1520 #include "petscpc.h"
1521 EXTERN_C_BEGIN
1522 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1523 EXTERN_C_END
1524 
1525 EXTERN_C_BEGIN
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatCreate_MPIAIJ"
1528 int MatCreate_MPIAIJ(Mat B)
1529 {
1530   Mat_MPIAIJ *b;
1531   int        ierr,i,size;
1532 
1533   PetscFunctionBegin;
1534   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1535 
1536   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1537   B->data         = (void*)b;
1538   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1539   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1540   B->factor       = 0;
1541   B->assembled    = PETSC_FALSE;
1542   B->mapping      = 0;
1543 
1544   B->insertmode      = NOT_SET_VALUES;
1545   b->size            = size;
1546   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1547 
1548   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1549   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1550 
1551   /* the information in the maps duplicates the information computed below, eventually
1552      we should remove the duplicate information that is not contained in the maps */
1553   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1554   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1555 
1556   /* build local table of row and column ownerships */
1557   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1558   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1559   b->cowners = b->rowners + b->size + 2;
1560   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1561   b->rowners[0] = 0;
1562   for (i=2; i<=b->size; i++) {
1563     b->rowners[i] += b->rowners[i-1];
1564   }
1565   b->rstart = b->rowners[b->rank];
1566   b->rend   = b->rowners[b->rank+1];
1567   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1568   b->cowners[0] = 0;
1569   for (i=2; i<=b->size; i++) {
1570     b->cowners[i] += b->cowners[i-1];
1571   }
1572   b->cstart = b->cowners[b->rank];
1573   b->cend   = b->cowners[b->rank+1];
1574 
1575   /* build cache for off array entries formed */
1576   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1577   b->donotstash  = PETSC_FALSE;
1578   b->colmap      = 0;
1579   b->garray      = 0;
1580   b->roworiented = PETSC_TRUE;
1581 
1582   /* stuff used for matrix vector multiply */
1583   b->lvec      = PETSC_NULL;
1584   b->Mvctx     = PETSC_NULL;
1585 
1586   /* stuff for MatGetRow() */
1587   b->rowindices   = 0;
1588   b->rowvalues    = 0;
1589   b->getrowactive = PETSC_FALSE;
1590 
1591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1592                                      "MatStoreValues_MPIAIJ",
1593                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1595                                      "MatRetrieveValues_MPIAIJ",
1596                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1598                                      "MatGetDiagonalBlock_MPIAIJ",
1599                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1600 
1601   PetscFunctionReturn(0);
1602 }
1603 EXTERN_C_END
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1607 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1608 {
1609   Mat        mat;
1610   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1611   int        ierr;
1612 
1613   PetscFunctionBegin;
1614   *newmat       = 0;
1615   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1616   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1617   a    = (Mat_MPIAIJ*)mat->data;
1618   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1619   mat->factor       = matin->factor;
1620   mat->assembled    = PETSC_TRUE;
1621   mat->insertmode   = NOT_SET_VALUES;
1622   mat->preallocated = PETSC_TRUE;
1623 
1624   a->rstart       = oldmat->rstart;
1625   a->rend         = oldmat->rend;
1626   a->cstart       = oldmat->cstart;
1627   a->cend         = oldmat->cend;
1628   a->size         = oldmat->size;
1629   a->rank         = oldmat->rank;
1630   a->donotstash   = oldmat->donotstash;
1631   a->roworiented  = oldmat->roworiented;
1632   a->rowindices   = 0;
1633   a->rowvalues    = 0;
1634   a->getrowactive = PETSC_FALSE;
1635 
1636   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1637   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1638   if (oldmat->colmap) {
1639 #if defined (PETSC_USE_CTABLE)
1640     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1641 #else
1642     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1643     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1644     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1645 #endif
1646   } else a->colmap = 0;
1647   if (oldmat->garray) {
1648     int len;
1649     len  = oldmat->B->n;
1650     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1651     PetscLogObjectMemory(mat,len*sizeof(int));
1652     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1653   } else a->garray = 0;
1654 
1655   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1656   PetscLogObjectParent(mat,a->lvec);
1657   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1658   PetscLogObjectParent(mat,a->Mvctx);
1659   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1660   PetscLogObjectParent(mat,a->A);
1661   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1662   PetscLogObjectParent(mat,a->B);
1663   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1664   *newmat = mat;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #include "petscsys.h"
1669 
1670 EXTERN_C_BEGIN
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatLoad_MPIAIJ"
1673 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1674 {
1675   Mat          A;
1676   PetscScalar  *vals,*svals;
1677   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1678   MPI_Status   status;
1679   int          i,nz,ierr,j,rstart,rend,fd;
1680   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1681   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1682   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1683 
1684   PetscFunctionBegin;
1685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1686   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1687   if (!rank) {
1688     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1689     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1690     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1691     if (header[3] < 0) {
1692       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1693     }
1694   }
1695 
1696   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1697   M = header[1]; N = header[2];
1698   /* determine ownership of all rows */
1699   m = M/size + ((M % size) > rank);
1700   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1701   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1702   rowners[0] = 0;
1703   for (i=2; i<=size; i++) {
1704     rowners[i] += rowners[i-1];
1705   }
1706   rstart = rowners[rank];
1707   rend   = rowners[rank+1];
1708 
1709   /* distribute row lengths to all processors */
1710   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1711   offlens = ourlens + (rend-rstart);
1712   if (!rank) {
1713     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1714     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1715     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1716     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1717     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1718     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1719   } else {
1720     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1721   }
1722 
1723   if (!rank) {
1724     /* calculate the number of nonzeros on each processor */
1725     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1726     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1727     for (i=0; i<size; i++) {
1728       for (j=rowners[i]; j< rowners[i+1]; j++) {
1729         procsnz[i] += rowlengths[j];
1730       }
1731     }
1732     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1733 
1734     /* determine max buffer needed and allocate it */
1735     maxnz = 0;
1736     for (i=0; i<size; i++) {
1737       maxnz = PetscMax(maxnz,procsnz[i]);
1738     }
1739     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1740 
1741     /* read in my part of the matrix column indices  */
1742     nz   = procsnz[0];
1743     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1744     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1745 
1746     /* read in every one elses and ship off */
1747     for (i=1; i<size; i++) {
1748       nz   = procsnz[i];
1749       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1750       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1751     }
1752     ierr = PetscFree(cols);CHKERRQ(ierr);
1753   } else {
1754     /* determine buffer space needed for message */
1755     nz = 0;
1756     for (i=0; i<m; i++) {
1757       nz += ourlens[i];
1758     }
1759     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1760 
1761     /* receive message of column indices*/
1762     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1763     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1764     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1765   }
1766 
1767   /* determine column ownership if matrix is not square */
1768   if (N != M) {
1769     n      = N/size + ((N % size) > rank);
1770     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1771     cstart = cend - n;
1772   } else {
1773     cstart = rstart;
1774     cend   = rend;
1775     n      = cend - cstart;
1776   }
1777 
1778   /* loop over local rows, determining number of off diagonal entries */
1779   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1780   jj = 0;
1781   for (i=0; i<m; i++) {
1782     for (j=0; j<ourlens[i]; j++) {
1783       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1784       jj++;
1785     }
1786   }
1787 
1788   /* create our matrix */
1789   for (i=0; i<m; i++) {
1790     ourlens[i] -= offlens[i];
1791   }
1792   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1793   A = *newmat;
1794   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1795   for (i=0; i<m; i++) {
1796     ourlens[i] += offlens[i];
1797   }
1798 
1799   if (!rank) {
1800     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1801 
1802     /* read in my part of the matrix numerical values  */
1803     nz   = procsnz[0];
1804     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1805 
1806     /* insert into matrix */
1807     jj      = rstart;
1808     smycols = mycols;
1809     svals   = vals;
1810     for (i=0; i<m; i++) {
1811       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1812       smycols += ourlens[i];
1813       svals   += ourlens[i];
1814       jj++;
1815     }
1816 
1817     /* read in other processors and ship out */
1818     for (i=1; i<size; i++) {
1819       nz   = procsnz[i];
1820       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1821       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1822     }
1823     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1824   } else {
1825     /* receive numeric values */
1826     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1827 
1828     /* receive message of values*/
1829     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1830     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1831     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1832 
1833     /* insert into matrix */
1834     jj      = rstart;
1835     smycols = mycols;
1836     svals   = vals;
1837     for (i=0; i<m; i++) {
1838       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1839       smycols += ourlens[i];
1840       svals   += ourlens[i];
1841       jj++;
1842     }
1843   }
1844   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1845   ierr = PetscFree(vals);CHKERRQ(ierr);
1846   ierr = PetscFree(mycols);CHKERRQ(ierr);
1847   ierr = PetscFree(rowners);CHKERRQ(ierr);
1848 
1849   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1850   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 EXTERN_C_END
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1857 /*
1858     Not great since it makes two copies of the submatrix, first an SeqAIJ
1859   in local and then by concatenating the local matrices the end result.
1860   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1861 */
1862 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1863 {
1864   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1865   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1866   Mat          *local,M,Mreuse;
1867   PetscScalar  *vwork,*aa;
1868   MPI_Comm     comm = mat->comm;
1869   Mat_SeqAIJ   *aij;
1870 
1871 
1872   PetscFunctionBegin;
1873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875 
1876   if (call ==  MAT_REUSE_MATRIX) {
1877     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1878     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1879     local = &Mreuse;
1880     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1881   } else {
1882     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1883     Mreuse = *local;
1884     ierr   = PetscFree(local);CHKERRQ(ierr);
1885   }
1886 
1887   /*
1888       m - number of local rows
1889       n - number of columns (same on all processors)
1890       rstart - first row in new global matrix generated
1891   */
1892   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1893   if (call == MAT_INITIAL_MATRIX) {
1894     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1895     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1896     ii  = aij->i;
1897     jj  = aij->j;
1898 
1899     /*
1900         Determine the number of non-zeros in the diagonal and off-diagonal
1901         portions of the matrix in order to do correct preallocation
1902     */
1903 
1904     /* first get start and end of "diagonal" columns */
1905     if (csize == PETSC_DECIDE) {
1906       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1907       if (mglobal == n) { /* square matrix */
1908 	nlocal = m;
1909       } else {
1910         nlocal = n/size + ((n % size) > rank);
1911       }
1912     } else {
1913       nlocal = csize;
1914     }
1915     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1916     rstart = rend - nlocal;
1917     if (rank == size - 1 && rend != n) {
1918       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1919     }
1920 
1921     /* next, compute all the lengths */
1922     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
1923     olens = dlens + m;
1924     for (i=0; i<m; i++) {
1925       jend = ii[i+1] - ii[i];
1926       olen = 0;
1927       dlen = 0;
1928       for (j=0; j<jend; j++) {
1929         if (*jj < rstart || *jj >= rend) olen++;
1930         else dlen++;
1931         jj++;
1932       }
1933       olens[i] = olen;
1934       dlens[i] = dlen;
1935     }
1936     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
1937     ierr = PetscFree(dlens);CHKERRQ(ierr);
1938   } else {
1939     int ml,nl;
1940 
1941     M = *newmat;
1942     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1943     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1944     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1945     /*
1946          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1947        rather than the slower MatSetValues().
1948     */
1949     M->was_assembled = PETSC_TRUE;
1950     M->assembled     = PETSC_FALSE;
1951   }
1952   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1953   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1954   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1955   ii  = aij->i;
1956   jj  = aij->j;
1957   aa  = aij->a;
1958   for (i=0; i<m; i++) {
1959     row   = rstart + i;
1960     nz    = ii[i+1] - ii[i];
1961     cwork = jj;     jj += nz;
1962     vwork = aa;     aa += nz;
1963     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1964   }
1965 
1966   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   *newmat = M;
1969 
1970   /* save submatrix used in processor for next request */
1971   if (call ==  MAT_INITIAL_MATRIX) {
1972     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1973     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1974   }
1975 
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMPIAIJSetPreallocation"
1981 /*@C
1982    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1983    (the default parallel PETSc format).  For good matrix assembly performance
1984    the user should preallocate the matrix storage by setting the parameters
1985    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1986    performance can be increased by more than a factor of 50.
1987 
1988    Collective on MPI_Comm
1989 
1990    Input Parameters:
1991 +  A - the matrix
1992 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1993            (same value is used for all local rows)
1994 .  d_nnz - array containing the number of nonzeros in the various rows of the
1995            DIAGONAL portion of the local submatrix (possibly different for each row)
1996            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1997            The size of this array is equal to the number of local rows, i.e 'm'.
1998            You must leave room for the diagonal entry even if it is zero.
1999 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2000            submatrix (same value is used for all local rows).
2001 -  o_nnz - array containing the number of nonzeros in the various rows of the
2002            OFF-DIAGONAL portion of the local submatrix (possibly different for
2003            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2004            structure. The size of this array is equal to the number
2005            of local rows, i.e 'm'.
2006 
2007    The AIJ format (also called the Yale sparse matrix format or
2008    compressed row storage), is fully compatible with standard Fortran 77
2009    storage.  That is, the stored row and column indices can begin at
2010    either one (as in Fortran) or zero.  See the users manual for details.
2011 
2012    The user MUST specify either the local or global matrix dimensions
2013    (possibly both).
2014 
2015    The parallel matrix is partitioned such that the first m0 rows belong to
2016    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2017    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2018 
2019    The DIAGONAL portion of the local submatrix of a processor can be defined
2020    as the submatrix which is obtained by extraction the part corresponding
2021    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2022    first row that belongs to the processor, and r2 is the last row belonging
2023    to the this processor. This is a square mxm matrix. The remaining portion
2024    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2025 
2026    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2027 
2028    By default, this format uses inodes (identical nodes) when possible.
2029    We search for consecutive rows with the same nonzero structure, thereby
2030    reusing matrix information to achieve increased efficiency.
2031 
2032    Options Database Keys:
2033 +  -mat_aij_no_inode  - Do not use inodes
2034 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2035 -  -mat_aij_oneindex - Internally use indexing starting at 1
2036         rather than 0.  Note that when calling MatSetValues(),
2037         the user still MUST index entries starting at 0!
2038 
2039    Example usage:
2040 
2041    Consider the following 8x8 matrix with 34 non-zero values, that is
2042    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2043    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2044    as follows:
2045 
2046 .vb
2047             1  2  0  |  0  3  0  |  0  4
2048     Proc0   0  5  6  |  7  0  0  |  8  0
2049             9  0 10  | 11  0  0  | 12  0
2050     -------------------------------------
2051            13  0 14  | 15 16 17  |  0  0
2052     Proc1   0 18  0  | 19 20 21  |  0  0
2053             0  0  0  | 22 23  0  | 24  0
2054     -------------------------------------
2055     Proc2  25 26 27  |  0  0 28  | 29  0
2056            30  0  0  | 31 32 33  |  0 34
2057 .ve
2058 
2059    This can be represented as a collection of submatrices as:
2060 
2061 .vb
2062       A B C
2063       D E F
2064       G H I
2065 .ve
2066 
2067    Where the submatrices A,B,C are owned by proc0, D,E,F are
2068    owned by proc1, G,H,I are owned by proc2.
2069 
2070    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2071    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2072    The 'M','N' parameters are 8,8, and have the same values on all procs.
2073 
2074    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2075    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2076    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2077    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2078    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2079    matrix, ans [DF] as another SeqAIJ matrix.
2080 
2081    When d_nz, o_nz parameters are specified, d_nz storage elements are
2082    allocated for every row of the local diagonal submatrix, and o_nz
2083    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2084    One way to choose d_nz and o_nz is to use the max nonzerors per local
2085    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2086    In this case, the values of d_nz,o_nz are:
2087 .vb
2088      proc0 : dnz = 2, o_nz = 2
2089      proc1 : dnz = 3, o_nz = 2
2090      proc2 : dnz = 1, o_nz = 4
2091 .ve
2092    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2093    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2094    for proc3. i.e we are using 12+15+10=37 storage locations to store
2095    34 values.
2096 
2097    When d_nnz, o_nnz parameters are specified, the storage is specified
2098    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2099    In the above case the values for d_nnz,o_nnz are:
2100 .vb
2101      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2102      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2103      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2104 .ve
2105    Here the space allocated is sum of all the above values i.e 34, and
2106    hence pre-allocation is perfect.
2107 
2108    Level: intermediate
2109 
2110 .keywords: matrix, aij, compressed row, sparse, parallel
2111 
2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2113 @*/
2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2115 {
2116   Mat_MPIAIJ   *b;
2117   int          ierr,i;
2118   PetscTruth   flg2;
2119 
2120   PetscFunctionBegin;
2121   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2122   if (!flg2) PetscFunctionReturn(0);
2123   B->preallocated = PETSC_TRUE;
2124   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2125   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2126   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2127   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2128   if (d_nnz) {
2129     for (i=0; i<B->m; i++) {
2130       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2131     }
2132   }
2133   if (o_nnz) {
2134     for (i=0; i<B->m; i++) {
2135       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2136     }
2137   }
2138   b = (Mat_MPIAIJ*)B->data;
2139 
2140   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2141   PetscLogObjectParent(B,b->A);
2142   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2143   PetscLogObjectParent(B,b->B);
2144 
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatCreateMPIAIJ"
2150 /*@C
2151    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2152    (the default parallel PETSc format).  For good matrix assembly performance
2153    the user should preallocate the matrix storage by setting the parameters
2154    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2155    performance can be increased by more than a factor of 50.
2156 
2157    Collective on MPI_Comm
2158 
2159    Input Parameters:
2160 +  comm - MPI communicator
2161 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2162            This value should be the same as the local size used in creating the
2163            y vector for the matrix-vector product y = Ax.
2164 .  n - This value should be the same as the local size used in creating the
2165        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2166        calculated if N is given) For square matrices n is almost always m.
2167 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2168 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2169 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2170            (same value is used for all local rows)
2171 .  d_nnz - array containing the number of nonzeros in the various rows of the
2172            DIAGONAL portion of the local submatrix (possibly different for each row)
2173            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2174            The size of this array is equal to the number of local rows, i.e 'm'.
2175            You must leave room for the diagonal entry even if it is zero.
2176 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2177            submatrix (same value is used for all local rows).
2178 -  o_nnz - array containing the number of nonzeros in the various rows of the
2179            OFF-DIAGONAL portion of the local submatrix (possibly different for
2180            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2181            structure. The size of this array is equal to the number
2182            of local rows, i.e 'm'.
2183 
2184    Output Parameter:
2185 .  A - the matrix
2186 
2187    Notes:
2188    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2189    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2190    storage requirements for this matrix.
2191 
2192    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2193    processor than it must be used on all processors that share the object for
2194    that argument.
2195 
2196    The AIJ format (also called the Yale sparse matrix format or
2197    compressed row storage), is fully compatible with standard Fortran 77
2198    storage.  That is, the stored row and column indices can begin at
2199    either one (as in Fortran) or zero.  See the users manual for details.
2200 
2201    The user MUST specify either the local or global matrix dimensions
2202    (possibly both).
2203 
2204    The parallel matrix is partitioned such that the first m0 rows belong to
2205    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2206    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2207 
2208    The DIAGONAL portion of the local submatrix of a processor can be defined
2209    as the submatrix which is obtained by extraction the part corresponding
2210    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2211    first row that belongs to the processor, and r2 is the last row belonging
2212    to the this processor. This is a square mxm matrix. The remaining portion
2213    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2214 
2215    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2216 
2217    By default, this format uses inodes (identical nodes) when possible.
2218    We search for consecutive rows with the same nonzero structure, thereby
2219    reusing matrix information to achieve increased efficiency.
2220 
2221    Options Database Keys:
2222 +  -mat_aij_no_inode  - Do not use inodes
2223 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2224 -  -mat_aij_oneindex - Internally use indexing starting at 1
2225         rather than 0.  Note that when calling MatSetValues(),
2226         the user still MUST index entries starting at 0!
2227 
2228 
2229    Example usage:
2230 
2231    Consider the following 8x8 matrix with 34 non-zero values, that is
2232    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2233    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2234    as follows:
2235 
2236 .vb
2237             1  2  0  |  0  3  0  |  0  4
2238     Proc0   0  5  6  |  7  0  0  |  8  0
2239             9  0 10  | 11  0  0  | 12  0
2240     -------------------------------------
2241            13  0 14  | 15 16 17  |  0  0
2242     Proc1   0 18  0  | 19 20 21  |  0  0
2243             0  0  0  | 22 23  0  | 24  0
2244     -------------------------------------
2245     Proc2  25 26 27  |  0  0 28  | 29  0
2246            30  0  0  | 31 32 33  |  0 34
2247 .ve
2248 
2249    This can be represented as a collection of submatrices as:
2250 
2251 .vb
2252       A B C
2253       D E F
2254       G H I
2255 .ve
2256 
2257    Where the submatrices A,B,C are owned by proc0, D,E,F are
2258    owned by proc1, G,H,I are owned by proc2.
2259 
2260    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2261    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2262    The 'M','N' parameters are 8,8, and have the same values on all procs.
2263 
2264    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2265    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2266    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2267    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2268    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2269    matrix, ans [DF] as another SeqAIJ matrix.
2270 
2271    When d_nz, o_nz parameters are specified, d_nz storage elements are
2272    allocated for every row of the local diagonal submatrix, and o_nz
2273    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2274    One way to choose d_nz and o_nz is to use the max nonzerors per local
2275    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2276    In this case, the values of d_nz,o_nz are:
2277 .vb
2278      proc0 : dnz = 2, o_nz = 2
2279      proc1 : dnz = 3, o_nz = 2
2280      proc2 : dnz = 1, o_nz = 4
2281 .ve
2282    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2283    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2284    for proc3. i.e we are using 12+15+10=37 storage locations to store
2285    34 values.
2286 
2287    When d_nnz, o_nnz parameters are specified, the storage is specified
2288    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2289    In the above case the values for d_nnz,o_nnz are:
2290 .vb
2291      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2292      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2293      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2294 .ve
2295    Here the space allocated is sum of all the above values i.e 34, and
2296    hence pre-allocation is perfect.
2297 
2298    Level: intermediate
2299 
2300 .keywords: matrix, aij, compressed row, sparse, parallel
2301 
2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2303 @*/
2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2305 {
2306   int ierr,size;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2311   if (size > 1) {
2312     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2313     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2314   } else {
2315     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2316     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2324 {
2325   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2326   PetscFunctionBegin;
2327   *Ad     = a->A;
2328   *Ao     = a->B;
2329   *colmap = a->garray;
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2336 {
2337   int        ierr;
2338   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2339 
2340   PetscFunctionBegin;
2341   if (coloring->ctype == IS_COLORING_LOCAL) {
2342     int        *allcolors,*colors,i;
2343     ISColoring ocoloring;
2344 
2345     /* set coloring for diagonal portion */
2346     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2347 
2348     /* set coloring for off-diagonal portion */
2349     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2350     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2351     for (i=0; i<a->B->n; i++) {
2352       colors[i] = allcolors[a->garray[i]];
2353     }
2354     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2355     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2356     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2357     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2358   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2359     int        *colors,i,*larray;
2360     ISColoring ocoloring;
2361 
2362     /* set coloring for diagonal portion */
2363     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2364     for (i=0; i<a->A->n; i++) {
2365       larray[i] = i + a->cstart;
2366     }
2367     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2368     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2369     for (i=0; i<a->A->n; i++) {
2370       colors[i] = coloring->colors[larray[i]];
2371     }
2372     ierr = PetscFree(larray);CHKERRQ(ierr);
2373     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2374     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2375     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2376 
2377     /* set coloring for off-diagonal portion */
2378     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2379     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2380     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2381     for (i=0; i<a->B->n; i++) {
2382       colors[i] = coloring->colors[larray[i]];
2383     }
2384     ierr = PetscFree(larray);CHKERRQ(ierr);
2385     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2386     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2387     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2388   } else {
2389     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2390   }
2391 
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2398 {
2399   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2400   int        ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2404   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2411 {
2412   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2413   int        ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2417   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatFileMerge"
2423 /*@C
2424       MatFileMerge - Saves a single large PETSc matrix by reading in
2425             sequential matrices from each processor and concatinating them
2426             together.
2427 
2428     Collective on MPI_Comm
2429 
2430    Input Parameters:
2431 +    comm - the communicators the individual files are on
2432 .    infile - name of input file
2433 -    outfile - name of output file
2434 
2435     Level: advanced
2436 
2437    Notes: The number of columns of the matrix in EACH of the seperate files
2438       MUST be the same.
2439 
2440           The true name of the input files on each processor is obtained by
2441       appending .rank onto the infile string; where rank is 0 up to one less
2442       then the number of processors
2443 @*/
2444 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile)
2445 {
2446   int         ierr,rank,len,m,n,i,rstart,*indx,nnz,I;
2447   PetscViewer in,out;
2448   char        *name;
2449   Mat         A,B;
2450   PetscScalar *values;
2451 
2452   PetscFunctionBegin;
2453   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2454   ierr = PetscStrlen(infile,&len);CHKERRQ(ierr);
2455   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2456   sprintf(name,"%s.%d",infile,rank);
2457   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr);
2458   ierr = PetscFree(name);
2459   ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr);
2460   ierr = PetscViewerDestroy(in);CHKERRQ(ierr);
2461 
2462   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
2463   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr);
2464   ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr);
2465   for (i=0;i<m;i++) {
2466     ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2467     I    = i + rstart;
2468     ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2469     ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr);
2470   }
2471   ierr = MatDestroy(A);CHKERRQ(ierr);
2472   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2473   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2474 
2475   ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2476   ierr = MatView(B,out);CHKERRQ(ierr);
2477   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2478   ierr = MatDestroy(B);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatFileSplit"
2484 int MatFileSplit(Mat A,char *outfile)
2485 {
2486   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2487   PetscViewer out;
2488   char        *name;
2489   Mat         B;
2490   PetscScalar *values;
2491 
2492   PetscFunctionBegin;
2493 
2494   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2495   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2496   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2497   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2498   for (i=0;i<m;i++) {
2499     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2500     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2501     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2502   }
2503   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2504   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2505 
2506   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2507   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2508   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2509   sprintf(name,"%s.%d",outfile,rank);
2510   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2511   ierr = PetscFree(name);
2512   ierr = MatView(B,out);CHKERRQ(ierr);
2513   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2514   ierr = MatDestroy(B);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517