xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1b1dd7ad34ebf6356801aebcdcd5dfae39b3e5f4)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory((PetscObject)mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
747   PetscInt      *owners = A->rmap->range;
748   PetscInt       n      = A->rmap->n;
749   PetscMPIInt    size   = mat->size;
750   PetscSF        sf;
751   PetscInt      *lrows;
752   PetscSFNode   *rrows;
753   PetscInt       lastidx = -1, r, p = 0, len = 0;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create SF where leaves are input rows and roots are owned rows */
758   ierr = PetscMalloc(n * sizeof(PetscInt), &lrows);CHKERRQ(ierr);
759   ierr = PetscMemzero(lrows, n * sizeof(PetscInt));CHKERRQ(ierr);
760   ierr = PetscMalloc(N * sizeof(PetscSFNode), &rrows);CHKERRQ(ierr);
761   for (r = 0; r < N; ++r) {
762     const PetscInt idx   = rows[r];
763     PetscBool      found = PETSC_FALSE;
764     /* Trick for efficient searching for sorted rows */
765     if (lastidx > idx) p = 0;
766     lastidx = idx;
767     for (; p < size; ++p) {
768       if (idx >= owners[p] && idx < owners[p+1]) {
769         rrows[r].rank  = p;
770         rrows[r].index = rows[r] - owners[p];
771         found = PETSC_TRUE;
772         break;
773       }
774     }
775     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
776   }
777   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
778   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
779   /* Collect flags for rows to be zeroed */
780   ierr = PetscSFReduceBegin(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr);
781   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
782   /* Compress and put in row numbers */
783   for (r = 0; r < n; ++r) if (lrows[r]) lrows[len++] = r;
784   /* fix right hand side if needed */
785   if (x && b) {
786     const PetscScalar *xx;
787     PetscScalar       *bb;
788 
789     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
791     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
792     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
793     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
794   }
795   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
796   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
797   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
798     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
799   } else if (diag != 0.0) {
800     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
801     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
802     for (r = 0; r < len; ++r) {
803       const PetscInt row = lrows[r] + A->rmap->rstart;
804       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
805     }
806     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808   } else {
809     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
810   }
811   ierr = PetscFree(lrows);CHKERRQ(ierr);
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
817 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
818 {
819   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
820   PetscErrorCode    ierr;
821   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
822   PetscInt          i,*owners = A->rmap->range;
823   PetscInt          *nprocs,j,idx,nsends;
824   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
825   PetscInt          *rvalues,count,base,slen,*source;
826   PetscInt          *lens,*lrows,*values,m;
827   MPI_Comm          comm;
828   MPI_Request       *send_waits,*recv_waits;
829   MPI_Status        recv_status,*send_status;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 #if defined(PETSC_DEBUG)
837   PetscBool found = PETSC_FALSE;
838 #endif
839 
840   PetscFunctionBegin;
841   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
842   /*  first count number of contributors to each processor */
843   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
844   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
845   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
846   j    = 0;
847   for (i=0; i<N; i++) {
848     if (lastidx > (idx = rows[i])) j = 0;
849     lastidx = idx;
850     for (; j<size; j++) {
851       if (idx >= owners[j] && idx < owners[j+1]) {
852         nprocs[2*j]++;
853         nprocs[2*j+1] = 1;
854         owner[i]      = j;
855 #if defined(PETSC_DEBUG)
856         found = PETSC_TRUE;
857 #endif
858         break;
859       }
860     }
861 #if defined(PETSC_DEBUG)
862     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
863     found = PETSC_FALSE;
864 #endif
865   }
866   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
867 
868   /* inform other processors of number of messages and max length*/
869   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
870 
871   /* post receives:   */
872   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
873   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
874   for (i=0; i<nrecvs; i++) {
875     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
876   }
877 
878   /* do sends:
879       1) starts[i] gives the starting index in svalues for stuff going to
880          the ith processor
881   */
882   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
883   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
884   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
885 
886   starts[0] = 0;
887   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
888   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
889 
890   starts[0] = 0;
891   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
892   count = 0;
893   for (i=0; i<size; i++) {
894     if (nprocs[2*i+1]) {
895       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
896     }
897   }
898   ierr = PetscFree(starts);CHKERRQ(ierr);
899 
900   base = owners[rank];
901 
902   /*  wait on receives */
903   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
904   count = nrecvs; slen = 0;
905   while (count) {
906     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
907     /* unpack receives into our local space */
908     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
909 
910     source[imdex] = recv_status.MPI_SOURCE;
911     lens[imdex]   = n;
912     slen         += n;
913     count--;
914   }
915   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
916 
917   /* move the data into the send scatter */
918   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
919   count = 0;
920   for (i=0; i<nrecvs; i++) {
921     values = rvalues + i*nmax;
922     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
923   }
924   ierr = PetscFree(rvalues);CHKERRQ(ierr);
925   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
926   ierr = PetscFree(owner);CHKERRQ(ierr);
927   ierr = PetscFree(nprocs);CHKERRQ(ierr);
928   /* lrows are the local rows to be zeroed, slen is the number of local rows */
929 
930   /* zero diagonal part of matrix */
931   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
932 
933   /* handle off diagonal part of matrix */
934   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
935   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
936   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
937   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
938   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
939   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
942   if (x) {
943     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
946     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
947   }
948   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
949 
950   /* remove zeroed rows of off diagonal matrix */
951   ii = aij->i;
952   for (i=0; i<slen; i++) {
953     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
954   }
955 
956   /* loop over all elements of off process part of matrix zeroing removed columns*/
957   if (aij->compressedrow.use) {
958     m    = aij->compressedrow.nrows;
959     ii   = aij->compressedrow.i;
960     ridx = aij->compressedrow.rindex;
961     for (i=0; i<m; i++) {
962       n  = ii[i+1] - ii[i];
963       aj = aij->j + ii[i];
964       aa = aij->a + ii[i];
965 
966       for (j=0; j<n; j++) {
967         if (PetscAbsScalar(mask[*aj])) {
968           if (b) bb[*ridx] -= *aa*xx[*aj];
969           *aa = 0.0;
970         }
971         aa++;
972         aj++;
973       }
974       ridx++;
975     }
976   } else { /* do not use compressed row format */
977     m = l->B->rmap->n;
978     for (i=0; i<m; i++) {
979       n  = ii[i+1] - ii[i];
980       aj = aij->j + ii[i];
981       aa = aij->a + ii[i];
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[i] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990     }
991   }
992   if (x) {
993     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
994     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
995   }
996   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
997   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
998   ierr = PetscFree(lrows);CHKERRQ(ierr);
999 
1000   /* wait on sends */
1001   if (nsends) {
1002     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1003     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1004     ierr = PetscFree(send_status);CHKERRQ(ierr);
1005   }
1006   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1007   ierr = PetscFree(svalues);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMult_MPIAIJ"
1013 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1016   PetscErrorCode ierr;
1017   PetscInt       nt;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1021   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1031 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1043 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1044 {
1045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1051   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1058 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscBool      merged;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1066   /* do nondiagonal part */
1067   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1068   if (!merged) {
1069     /* send it on its way */
1070     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1071     /* do local part */
1072     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1073     /* receive remote parts: note this assumes the values are not actually */
1074     /* added in yy until the next line, */
1075     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   } else {
1077     /* do local part */
1078     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1079     /* send it on its way */
1080     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081     /* values actually were received in the Begin() but we need to call this nop */
1082     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1089 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1090 {
1091   MPI_Comm       comm;
1092   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1093   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1094   IS             Me,Notme;
1095   PetscErrorCode ierr;
1096   PetscInt       M,N,first,last,*notme,i;
1097   PetscMPIInt    size;
1098 
1099   PetscFunctionBegin;
1100   /* Easy test: symmetric diagonal block */
1101   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1102   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1103   if (!*f) PetscFunctionReturn(0);
1104   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1106   if (size == 1) PetscFunctionReturn(0);
1107 
1108   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1109   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1110   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1111   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1112   for (i=0; i<first; i++) notme[i] = i;
1113   for (i=last; i<M; i++) notme[i-last+first] = i;
1114   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1115   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1116   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1117   Aoff = Aoffs[0];
1118   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1119   Boff = Boffs[0];
1120   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1121   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1122   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1123   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1124   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1125   ierr = PetscFree(notme);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1131 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   /* do nondiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   /* send it on its way */
1140   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   /* do local part */
1142   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1143   /* receive remote parts */
1144   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*
1149   This only works correctly for square matrices where the subblock A->A is the
1150    diagonal block
1151 */
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1154 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1155 {
1156   PetscErrorCode ierr;
1157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1158 
1159   PetscFunctionBegin;
1160   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1161   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1162   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatScale_MPIAIJ"
1168 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1169 {
1170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1175   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatDestroy_MPIAIJ"
1181 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1182 {
1183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1184   PetscErrorCode ierr;
1185 
1186   PetscFunctionBegin;
1187 #if defined(PETSC_USE_LOG)
1188   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1189 #endif
1190   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1191   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1192   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1193   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1194 #if defined(PETSC_USE_CTABLE)
1195   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1196 #else
1197   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1198 #endif
1199   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1200   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1201   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1202   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1203   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1204   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1205 
1206   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1220 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1221 {
1222   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1223   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1224   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1225   PetscErrorCode ierr;
1226   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1227   int            fd;
1228   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1229   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1230   PetscScalar    *column_values;
1231   PetscInt       message_count,flowcontrolcount;
1232   FILE           *file;
1233 
1234   PetscFunctionBegin;
1235   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1236   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1237   nz   = A->nz + B->nz;
1238   if (!rank) {
1239     header[0] = MAT_FILE_CLASSID;
1240     header[1] = mat->rmap->N;
1241     header[2] = mat->cmap->N;
1242 
1243     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1245     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     /* get largest number of rows any processor has */
1247     rlen  = mat->rmap->n;
1248     range = mat->rmap->range;
1249     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1250   } else {
1251     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     rlen = mat->rmap->n;
1253   }
1254 
1255   /* load up the local row counts */
1256   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1257   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1258 
1259   /* store the row lengths to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1263     for (i=1; i<size; i++) {
1264       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1265       rlen = range[i+1] - range[i];
1266       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1268     }
1269     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1270   } else {
1271     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1272     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1274   }
1275   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1276 
1277   /* load up the local column indices */
1278   nzmax = nz; /* th processor needs space a largest processor needs */
1279   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1281   cnt   = 0;
1282   for (i=0; i<mat->rmap->n; i++) {
1283     for (j=B->i[i]; j<B->i[i+1]; j++) {
1284       if ((col = garray[B->j[j]]) > cstart) break;
1285       column_indices[cnt++] = col;
1286     }
1287     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1288     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1289   }
1290   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1291 
1292   /* store the column indices to the file */
1293   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1294   if (!rank) {
1295     MPI_Status status;
1296     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1297     for (i=1; i<size; i++) {
1298       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1299       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1300       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1301       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1303     }
1304     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1307     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1308     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1310   }
1311   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1312 
1313   /* load up the local column values */
1314   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1315   cnt  = 0;
1316   for (i=0; i<mat->rmap->n; i++) {
1317     for (j=B->i[i]; j<B->i[i+1]; j++) {
1318       if (garray[B->j[j]] > cstart) break;
1319       column_values[cnt++] = B->a[j];
1320     }
1321     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1322     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1323   }
1324   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1325 
1326   /* store the column values to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     MPI_Status status;
1330     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1331     for (i=1; i<size; i++) {
1332       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1333       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1334       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1335       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1337     }
1338     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1339   } else {
1340     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1341     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1343     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1344   }
1345   ierr = PetscFree(column_values);CHKERRQ(ierr);
1346 
1347   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1348   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #include <petscdraw.h>
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1355 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1356 {
1357   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1358   PetscErrorCode    ierr;
1359   PetscMPIInt       rank = aij->rank,size = aij->size;
1360   PetscBool         isdraw,iascii,isbinary;
1361   PetscViewer       sviewer;
1362   PetscViewerFormat format;
1363 
1364   PetscFunctionBegin;
1365   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1366   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1367   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1368   if (iascii) {
1369     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1370     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1371       MatInfo   info;
1372       PetscBool inodes;
1373 
1374       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1375       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1376       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1377       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1378       if (!inodes) {
1379         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1380                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1381       } else {
1382         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1383                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1384       }
1385       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1386       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1387       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1388       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1389       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1390       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1391       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1392       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1393       PetscFunctionReturn(0);
1394     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1395       PetscInt inodecount,inodelimit,*inodes;
1396       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1397       if (inodes) {
1398         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1399       } else {
1400         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1401       }
1402       PetscFunctionReturn(0);
1403     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1404       PetscFunctionReturn(0);
1405     }
1406   } else if (isbinary) {
1407     if (size == 1) {
1408       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1409       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1410     } else {
1411       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1412     }
1413     PetscFunctionReturn(0);
1414   } else if (isdraw) {
1415     PetscDraw draw;
1416     PetscBool isnull;
1417     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1418     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1419   }
1420 
1421   if (size == 1) {
1422     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1423     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1424   } else {
1425     /* assemble the entire matrix onto first processor. */
1426     Mat        A;
1427     Mat_SeqAIJ *Aloc;
1428     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1429     MatScalar  *a;
1430 
1431     if (mat->rmap->N > 1024) {
1432       PetscBool flg = PETSC_FALSE;
1433 
1434       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1435       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1436     }
1437 
1438     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1439     if (!rank) {
1440       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1443     }
1444     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1445     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1446     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1447     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1448     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1449 
1450     /* copy over the A part */
1451     Aloc = (Mat_SeqAIJ*)aij->A->data;
1452     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1453     row  = mat->rmap->rstart;
1454     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1455     for (i=0; i<m; i++) {
1456       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1457       row++;
1458       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459     }
1460     aj = Aloc->j;
1461     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1462 
1463     /* copy over the B part */
1464     Aloc = (Mat_SeqAIJ*)aij->B->data;
1465     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1466     row  = mat->rmap->rstart;
1467     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468     ct   = cols;
1469     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1470     for (i=0; i<m; i++) {
1471       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1472       row++;
1473       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1474     }
1475     ierr = PetscFree(ct);CHKERRQ(ierr);
1476     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478     /*
1479        Everyone has to call to draw the matrix since the graphics waits are
1480        synchronized across all processors that share the PetscDraw object
1481     */
1482     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1483     if (!rank) {
1484       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1486       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1487       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1488     }
1489     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1490     ierr = MatDestroy(&A);CHKERRQ(ierr);
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatView_MPIAIJ"
1497 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1498 {
1499   PetscErrorCode ierr;
1500   PetscBool      iascii,isdraw,issocket,isbinary;
1501 
1502   PetscFunctionBegin;
1503   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1504   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1505   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1506   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1507   if (iascii || isdraw || isbinary || issocket) {
1508     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatSOR_MPIAIJ"
1515 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1516 {
1517   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1518   PetscErrorCode ierr;
1519   Vec            bb1 = 0;
1520   PetscBool      hasop;
1521 
1522   PetscFunctionBegin;
1523   if (flag == SOR_APPLY_UPPER) {
1524     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1525     PetscFunctionReturn(0);
1526   }
1527 
1528   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1529     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1530   }
1531 
1532   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1533     if (flag & SOR_ZERO_INITIAL_GUESS) {
1534       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1535       its--;
1536     }
1537 
1538     while (its--) {
1539       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541 
1542       /* update rhs: bb1 = bb - B*x */
1543       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1544       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1545 
1546       /* local sweep */
1547       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1548     }
1549   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554     while (its--) {
1555       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557 
1558       /* update rhs: bb1 = bb - B*x */
1559       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1560       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1561 
1562       /* local sweep */
1563       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1564     }
1565   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1566     if (flag & SOR_ZERO_INITIAL_GUESS) {
1567       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1568       its--;
1569     }
1570     while (its--) {
1571       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573 
1574       /* update rhs: bb1 = bb - B*x */
1575       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1576       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1577 
1578       /* local sweep */
1579       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1580     }
1581   } else if (flag & SOR_EISENSTAT) {
1582     Vec xx1;
1583 
1584     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1585     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1586 
1587     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589     if (!mat->diag) {
1590       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1591       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1592     }
1593     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1594     if (hasop) {
1595       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1596     } else {
1597       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1598     }
1599     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1600 
1601     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1602 
1603     /* local sweep */
1604     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1605     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1606     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1607   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1608 
1609   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatPermute_MPIAIJ"
1615 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1616 {
1617   Mat            aA,aB,Aperm;
1618   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1619   PetscScalar    *aa,*ba;
1620   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1621   PetscSF        rowsf,sf;
1622   IS             parcolp = NULL;
1623   PetscBool      done;
1624   PetscErrorCode ierr;
1625 
1626   PetscFunctionBegin;
1627   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1628   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1629   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1630   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1631 
1632   /* Invert row permutation to find out where my rows should go */
1633   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1634   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1635   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1636   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1637   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1638   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1639 
1640   /* Invert column permutation to find out where my columns should go */
1641   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1642   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1643   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1644   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1645   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1646   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1647   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1648 
1649   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1650   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1651   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1652 
1653   /* Find out where my gcols should go */
1654   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1655   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1659   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1660   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1661   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1662 
1663   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1664   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1665   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1666   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1667   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1668   for (i=0; i<m; i++) {
1669     PetscInt row = rdest[i],rowner;
1670     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1671     for (j=ai[i]; j<ai[i+1]; j++) {
1672       PetscInt cowner,col = cdest[aj[j]];
1673       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1674       if (rowner == cowner) dnnz[i]++;
1675       else onnz[i]++;
1676     }
1677     for (j=bi[i]; j<bi[i+1]; j++) {
1678       PetscInt cowner,col = gcdest[bj[j]];
1679       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1680       if (rowner == cowner) dnnz[i]++;
1681       else onnz[i]++;
1682     }
1683   }
1684   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1685   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1686   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1687   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1688   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1689   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1690   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1691 
1692   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1693   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1694   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1695   for (i=0; i<m; i++) {
1696     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1697     PetscInt rowlen;
1698     rowlen = ai[i+1] - ai[i];
1699     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1700     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1701     rowlen = bi[i+1] - bi[i];
1702     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1703     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1704   }
1705   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1706   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1708   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1709   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1710   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1711   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1712   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1713   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1714   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1715   *B = Aperm;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1721 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1722 {
1723   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1724   Mat            A    = mat->A,B = mat->B;
1725   PetscErrorCode ierr;
1726   PetscReal      isend[5],irecv[5];
1727 
1728   PetscFunctionBegin;
1729   info->block_size = 1.0;
1730   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1731 
1732   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1733   isend[3] = info->memory;  isend[4] = info->mallocs;
1734 
1735   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1736 
1737   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1738   isend[3] += info->memory;  isend[4] += info->mallocs;
1739   if (flag == MAT_LOCAL) {
1740     info->nz_used      = isend[0];
1741     info->nz_allocated = isend[1];
1742     info->nz_unneeded  = isend[2];
1743     info->memory       = isend[3];
1744     info->mallocs      = isend[4];
1745   } else if (flag == MAT_GLOBAL_MAX) {
1746     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1747 
1748     info->nz_used      = irecv[0];
1749     info->nz_allocated = irecv[1];
1750     info->nz_unneeded  = irecv[2];
1751     info->memory       = irecv[3];
1752     info->mallocs      = irecv[4];
1753   } else if (flag == MAT_GLOBAL_SUM) {
1754     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1755 
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   }
1762   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1763   info->fill_ratio_needed = 0;
1764   info->factor_mallocs    = 0;
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatSetOption_MPIAIJ"
1770 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1771 {
1772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   switch (op) {
1777   case MAT_NEW_NONZERO_LOCATIONS:
1778   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1779   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1780   case MAT_KEEP_NONZERO_PATTERN:
1781   case MAT_NEW_NONZERO_LOCATION_ERR:
1782   case MAT_USE_INODES:
1783   case MAT_IGNORE_ZERO_ENTRIES:
1784     MatCheckPreallocated(A,1);
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_ROW_ORIENTED:
1789     a->roworiented = flg;
1790 
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_NEW_DIAGONALS:
1795     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1796     break;
1797   case MAT_IGNORE_OFF_PROC_ENTRIES:
1798     a->donotstash = flg;
1799     break;
1800   case MAT_SPD:
1801     A->spd_set = PETSC_TRUE;
1802     A->spd     = flg;
1803     if (flg) {
1804       A->symmetric                  = PETSC_TRUE;
1805       A->structurally_symmetric     = PETSC_TRUE;
1806       A->symmetric_set              = PETSC_TRUE;
1807       A->structurally_symmetric_set = PETSC_TRUE;
1808     }
1809     break;
1810   case MAT_SYMMETRIC:
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_STRUCTURALLY_SYMMETRIC:
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_HERMITIAN:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_SYMMETRY_ETERNAL:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   default:
1823     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1824   }
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatGetRow_MPIAIJ"
1830 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1831 {
1832   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1833   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1834   PetscErrorCode ierr;
1835   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1836   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1837   PetscInt       *cmap,*idx_p;
1838 
1839   PetscFunctionBegin;
1840   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1841   mat->getrowactive = PETSC_TRUE;
1842 
1843   if (!mat->rowvalues && (idx || v)) {
1844     /*
1845         allocate enough space to hold information from the longest row.
1846     */
1847     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1848     PetscInt   max = 1,tmp;
1849     for (i=0; i<matin->rmap->n; i++) {
1850       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1851       if (max < tmp) max = tmp;
1852     }
1853     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1854   }
1855 
1856   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1857   lrow = row - rstart;
1858 
1859   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1860   if (!v)   {pvA = 0; pvB = 0;}
1861   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1862   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1863   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1864   nztot = nzA + nzB;
1865 
1866   cmap = mat->garray;
1867   if (v  || idx) {
1868     if (nztot) {
1869       /* Sort by increasing column numbers, assuming A and B already sorted */
1870       PetscInt imark = -1;
1871       if (v) {
1872         *v = v_p = mat->rowvalues;
1873         for (i=0; i<nzB; i++) {
1874           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1875           else break;
1876         }
1877         imark = i;
1878         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1879         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1880       }
1881       if (idx) {
1882         *idx = idx_p = mat->rowindices;
1883         if (imark > -1) {
1884           for (i=0; i<imark; i++) {
1885             idx_p[i] = cmap[cworkB[i]];
1886           }
1887         } else {
1888           for (i=0; i<nzB; i++) {
1889             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1890             else break;
1891           }
1892           imark = i;
1893         }
1894         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1895         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1896       }
1897     } else {
1898       if (idx) *idx = 0;
1899       if (v)   *v   = 0;
1900     }
1901   }
1902   *nz  = nztot;
1903   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1904   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1910 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1911 {
1912   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1913 
1914   PetscFunctionBegin;
1915   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1916   aij->getrowactive = PETSC_FALSE;
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatNorm_MPIAIJ"
1922 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1923 {
1924   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1925   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1926   PetscErrorCode ierr;
1927   PetscInt       i,j,cstart = mat->cmap->rstart;
1928   PetscReal      sum = 0.0;
1929   MatScalar      *v;
1930 
1931   PetscFunctionBegin;
1932   if (aij->size == 1) {
1933     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1934   } else {
1935     if (type == NORM_FROBENIUS) {
1936       v = amat->a;
1937       for (i=0; i<amat->nz; i++) {
1938         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1939       }
1940       v = bmat->a;
1941       for (i=0; i<bmat->nz; i++) {
1942         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1943       }
1944       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1945       *norm = PetscSqrtReal(*norm);
1946     } else if (type == NORM_1) { /* max column norm */
1947       PetscReal *tmp,*tmp2;
1948       PetscInt  *jj,*garray = aij->garray;
1949       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1950       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1951       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1952       *norm = 0.0;
1953       v     = amat->a; jj = amat->j;
1954       for (j=0; j<amat->nz; j++) {
1955         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1956       }
1957       v = bmat->a; jj = bmat->j;
1958       for (j=0; j<bmat->nz; j++) {
1959         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1960       }
1961       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1962       for (j=0; j<mat->cmap->N; j++) {
1963         if (tmp2[j] > *norm) *norm = tmp2[j];
1964       }
1965       ierr = PetscFree(tmp);CHKERRQ(ierr);
1966       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1967     } else if (type == NORM_INFINITY) { /* max row norm */
1968       PetscReal ntemp = 0.0;
1969       for (j=0; j<aij->A->rmap->n; j++) {
1970         v   = amat->a + amat->i[j];
1971         sum = 0.0;
1972         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1973           sum += PetscAbsScalar(*v); v++;
1974         }
1975         v = bmat->a + bmat->i[j];
1976         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1977           sum += PetscAbsScalar(*v); v++;
1978         }
1979         if (sum > ntemp) ntemp = sum;
1980       }
1981       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1982     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatTranspose_MPIAIJ"
1989 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1990 {
1991   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1992   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1993   PetscErrorCode ierr;
1994   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1995   PetscInt       cstart = A->cmap->rstart,ncol;
1996   Mat            B;
1997   MatScalar      *array;
1998 
1999   PetscFunctionBegin;
2000   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2001 
2002   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2003   ai = Aloc->i; aj = Aloc->j;
2004   bi = Bloc->i; bj = Bloc->j;
2005   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2006     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2007     PetscSFNode          *oloc;
2008     PETSC_UNUSED PetscSF sf;
2009 
2010     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2011     /* compute d_nnz for preallocation */
2012     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) {
2014       d_nnz[aj[i]]++;
2015       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2016     }
2017     /* compute local off-diagonal contributions */
2018     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2019     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2020     /* map those to global */
2021     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2022     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2023     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2024     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2025     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2026     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2027     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2028 
2029     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2030     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2031     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2034     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2035   } else {
2036     B    = *matout;
2037     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2039   }
2040 
2041   /* copy over the A part */
2042   array = Aloc->a;
2043   row   = A->rmap->rstart;
2044   for (i=0; i<ma; i++) {
2045     ncol = ai[i+1]-ai[i];
2046     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2047     row++;
2048     array += ncol; aj += ncol;
2049   }
2050   aj = Aloc->j;
2051   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2052 
2053   /* copy over the B part */
2054   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2055   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2056   array = Bloc->a;
2057   row   = A->rmap->rstart;
2058   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2059   cols_tmp = cols;
2060   for (i=0; i<mb; i++) {
2061     ncol = bi[i+1]-bi[i];
2062     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2063     row++;
2064     array += ncol; cols_tmp += ncol;
2065   }
2066   ierr = PetscFree(cols);CHKERRQ(ierr);
2067 
2068   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2071     *matout = B;
2072   } else {
2073     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2080 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2081 {
2082   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2083   Mat            a    = aij->A,b = aij->B;
2084   PetscErrorCode ierr;
2085   PetscInt       s1,s2,s3;
2086 
2087   PetscFunctionBegin;
2088   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2089   if (rr) {
2090     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2091     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2092     /* Overlap communication with computation. */
2093     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2094   }
2095   if (ll) {
2096     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2097     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2098     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2099   }
2100   /* scale  the diagonal block */
2101   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2102 
2103   if (rr) {
2104     /* Do a scatter end and then right scale the off-diagonal block */
2105     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2107   }
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2114 {
2115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatEqual_MPIAIJ"
2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2126 {
2127   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2128   Mat            a,b,c,d;
2129   PetscBool      flg;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   a = matA->A; b = matA->B;
2134   c = matB->A; d = matB->B;
2135 
2136   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2137   if (flg) {
2138     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2139   }
2140   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatCopy_MPIAIJ"
2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2147 {
2148   PetscErrorCode ierr;
2149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2150   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2151 
2152   PetscFunctionBegin;
2153   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2154   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2155     /* because of the column compression in the off-processor part of the matrix a->B,
2156        the number of columns in a->B and b->B may be different, hence we cannot call
2157        the MatCopy() directly on the two parts. If need be, we can provide a more
2158        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2159        then copying the submatrices */
2160     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2161   } else {
2162     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2163     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatSetUp_MPIAIJ"
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2183 {
2184   PetscInt       i,m=Y->rmap->N;
2185   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2186   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2187   const PetscInt *xi = x->i,*yi = y->i;
2188 
2189   PetscFunctionBegin;
2190   /* Set the number of nonzeros in the new matrix */
2191   for (i=0; i<m; i++) {
2192     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2193     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2194     nnz[i] = 0;
2195     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2196       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2197       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2198       nnz[i]++;
2199     }
2200     for (; k<nzy; k++) nnz[i]++;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatAXPY_MPIAIJ"
2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2208 {
2209   PetscErrorCode ierr;
2210   PetscInt       i;
2211   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2212   PetscBLASInt   bnz,one=1;
2213   Mat_SeqAIJ     *x,*y;
2214 
2215   PetscFunctionBegin;
2216   if (str == SAME_NONZERO_PATTERN) {
2217     PetscScalar alpha = a;
2218     x    = (Mat_SeqAIJ*)xx->A->data;
2219     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2220     y    = (Mat_SeqAIJ*)yy->A->data;
2221     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2222     x    = (Mat_SeqAIJ*)xx->B->data;
2223     y    = (Mat_SeqAIJ*)yy->B->data;
2224     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2225     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2226   } else if (str == SUBSET_NONZERO_PATTERN) {
2227     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2228 
2229     x = (Mat_SeqAIJ*)xx->B->data;
2230     y = (Mat_SeqAIJ*)yy->B->data;
2231     if (y->xtoy && y->XtoY != xx->B) {
2232       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2233       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2234     }
2235     if (!y->xtoy) { /* get xtoy */
2236       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2237       y->XtoY = xx->B;
2238       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2239     }
2240     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2241   } else {
2242     Mat      B;
2243     PetscInt *nnz_d,*nnz_o;
2244     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2245     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2246     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2247     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2248     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2249     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatConjugate_MPIAIJ"
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatRealPart_MPIAIJ"
2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2290   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2297 {
2298   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2303   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #if defined(PETSC_HAVE_PBGL)
2308 
2309 #include <boost/parallel/mpi/bsp_process_group.hpp>
2310 #include <boost/graph/distributed/ilu_default_graph.hpp>
2311 #include <boost/graph/distributed/ilu_0_block.hpp>
2312 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2313 #include <boost/graph/distributed/petsc/interface.hpp>
2314 #include <boost/multi_array.hpp>
2315 #include <boost/parallel/distributed_property_map->hpp>
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2319 /*
2320   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2321 */
2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2323 {
2324   namespace petsc = boost::distributed::petsc;
2325 
2326   namespace graph_dist = boost::graph::distributed;
2327   using boost::graph::distributed::ilu_default::process_group_type;
2328   using boost::graph::ilu_permuted;
2329 
2330   PetscBool      row_identity, col_identity;
2331   PetscContainer c;
2332   PetscInt       m, n, M, N;
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2337   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2338   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2339   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2340 
2341   process_group_type pg;
2342   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2343   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2344   lgraph_type& level_graph = *lgraph_p;
2345   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2346 
2347   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2348   ilu_permuted(level_graph);
2349 
2350   /* put together the new matrix */
2351   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2352   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2353   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2354   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2355   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2356   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2357   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359 
2360   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2361   ierr = PetscContainerSetPointer(c, lgraph_p);
2362   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2363   ierr = PetscContainerDestroy(&c);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2369 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2370 {
2371   PetscFunctionBegin;
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatSolve_MPIAIJ"
2377 /*
2378   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2379 */
2380 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2381 {
2382   namespace graph_dist = boost::graph::distributed;
2383 
2384   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2385   lgraph_type    *lgraph_p;
2386   PetscContainer c;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2391   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2392   ierr = VecCopy(b, x);CHKERRQ(ierr);
2393 
2394   PetscScalar *array_x;
2395   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2396   PetscInt sx;
2397   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2398 
2399   PetscScalar *array_b;
2400   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2401   PetscInt sb;
2402   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2403 
2404   lgraph_type& level_graph = *lgraph_p;
2405   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2406 
2407   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2408   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2409   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2410 
2411   typedef boost::iterator_property_map<array_ref_type::iterator,
2412                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2413   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2414   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2415 
2416   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2417   PetscFunctionReturn(0);
2418 }
2419 #endif
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatDestroy_MatRedundant"
2423 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2424 {
2425   PetscErrorCode ierr;
2426   Mat_Redundant  *redund;
2427   PetscInt       i;
2428   PetscMPIInt    size;
2429 
2430   PetscFunctionBegin;
2431   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2432   if (size == 1) {
2433     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2434     redund = a->redundant;
2435   } else {
2436     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2437     redund = a->redundant;
2438   }
2439   if (redund){
2440     if (redund->matseq) { /* via MatGetSubMatrices()  */
2441       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2442       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2443       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2444       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2445     } else {
2446       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2447       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2448       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2449       for (i=0; i<redund->nrecvs; i++) {
2450         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2451         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2452       }
2453       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2454     }
2455 
2456     if (redund->psubcomm) {
2457       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2458     }
2459     ierr = redund->Destroy(A);CHKERRQ(ierr);
2460     ierr = PetscFree(redund);CHKERRQ(ierr);
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2467 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2468 {
2469   PetscMPIInt    rank,size;
2470   MPI_Comm       comm;
2471   PetscErrorCode ierr;
2472   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2473   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2474   PetscInt       *rowrange = mat->rmap->range;
2475   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2476   Mat            A = aij->A,B=aij->B,C=*matredundant;
2477   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2478   PetscScalar    *sbuf_a;
2479   PetscInt       nzlocal=a->nz+b->nz;
2480   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2481   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2482   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2483   MatScalar      *aworkA,*aworkB;
2484   PetscScalar    *vals;
2485   PetscMPIInt    tag1,tag2,tag3,imdex;
2486   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2487   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2488   MPI_Status     recv_status,*send_status;
2489   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2490   PetscInt       **rbuf_j=NULL;
2491   PetscScalar    **rbuf_a=NULL;
2492   Mat_Redundant  *redund =NULL;
2493 
2494   PetscFunctionBegin;
2495   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2496   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2497   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2498   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2499   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2500 
2501   if (reuse == MAT_REUSE_MATRIX) {
2502     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2503     if (subsize == 1) {
2504       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2505       redund = c->redundant;
2506     } else {
2507       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2508       redund = c->redundant;
2509     }
2510     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2511 
2512     nsends    = redund->nsends;
2513     nrecvs    = redund->nrecvs;
2514     send_rank = redund->send_rank;
2515     recv_rank = redund->recv_rank;
2516     sbuf_nz   = redund->sbuf_nz;
2517     rbuf_nz   = redund->rbuf_nz;
2518     sbuf_j    = redund->sbuf_j;
2519     sbuf_a    = redund->sbuf_a;
2520     rbuf_j    = redund->rbuf_j;
2521     rbuf_a    = redund->rbuf_a;
2522   }
2523 
2524   if (reuse == MAT_INITIAL_MATRIX) {
2525     PetscInt    nleftover,np_subcomm;
2526 
2527     /* get the destination processors' id send_rank, nsends and nrecvs */
2528     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2529 
2530     np_subcomm = size/nsubcomm;
2531     nleftover  = size - nsubcomm*np_subcomm;
2532 
2533     /* block of codes below is specific for INTERLACED */
2534     /* ------------------------------------------------*/
2535     nsends = 0; nrecvs = 0;
2536     for (i=0; i<size; i++) {
2537       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2538         send_rank[nsends++] = i;
2539         recv_rank[nrecvs++] = i;
2540       }
2541     }
2542     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2543       i = size-nleftover-1;
2544       j = 0;
2545       while (j < nsubcomm - nleftover) {
2546         send_rank[nsends++] = i;
2547         i--; j++;
2548       }
2549     }
2550 
2551     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2552       for (i=0; i<nleftover; i++) {
2553         recv_rank[nrecvs++] = size-nleftover+i;
2554       }
2555     }
2556     /*----------------------------------------------*/
2557 
2558     /* allocate sbuf_j, sbuf_a */
2559     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2560     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2561     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2562     /*
2563     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2564     ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
2565      */
2566   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2567 
2568   /* copy mat's local entries into the buffers */
2569   if (reuse == MAT_INITIAL_MATRIX) {
2570     rownz_max = 0;
2571     rptr      = sbuf_j;
2572     cols      = sbuf_j + rend-rstart + 1;
2573     vals      = sbuf_a;
2574     rptr[0]   = 0;
2575     for (i=0; i<rend-rstart; i++) {
2576       row    = i + rstart;
2577       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2578       ncols  = nzA + nzB;
2579       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2580       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2581       /* load the column indices for this row into cols */
2582       lwrite = 0;
2583       for (l=0; l<nzB; l++) {
2584         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2585           vals[lwrite]   = aworkB[l];
2586           cols[lwrite++] = ctmp;
2587         }
2588       }
2589       for (l=0; l<nzA; l++) {
2590         vals[lwrite]   = aworkA[l];
2591         cols[lwrite++] = cstart + cworkA[l];
2592       }
2593       for (l=0; l<nzB; l++) {
2594         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2595           vals[lwrite]   = aworkB[l];
2596           cols[lwrite++] = ctmp;
2597         }
2598       }
2599       vals     += ncols;
2600       cols     += ncols;
2601       rptr[i+1] = rptr[i] + ncols;
2602       if (rownz_max < ncols) rownz_max = ncols;
2603     }
2604     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2605   } else { /* only copy matrix values into sbuf_a */
2606     rptr    = sbuf_j;
2607     vals    = sbuf_a;
2608     rptr[0] = 0;
2609     for (i=0; i<rend-rstart; i++) {
2610       row    = i + rstart;
2611       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2612       ncols  = nzA + nzB;
2613       cworkB = b->j + b->i[i];
2614       aworkA = a->a + a->i[i];
2615       aworkB = b->a + b->i[i];
2616       lwrite = 0;
2617       for (l=0; l<nzB; l++) {
2618         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2619       }
2620       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2621       for (l=0; l<nzB; l++) {
2622         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2623       }
2624       vals     += ncols;
2625       rptr[i+1] = rptr[i] + ncols;
2626     }
2627   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2628 
2629   /* send nzlocal to others, and recv other's nzlocal */
2630   /*--------------------------------------------------*/
2631   if (reuse == MAT_INITIAL_MATRIX) {
2632     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2633 
2634     s_waits2 = s_waits3 + nsends;
2635     s_waits1 = s_waits2 + nsends;
2636     r_waits1 = s_waits1 + nsends;
2637     r_waits2 = r_waits1 + nrecvs;
2638     r_waits3 = r_waits2 + nrecvs;
2639   } else {
2640     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2641 
2642     r_waits3 = s_waits3 + nsends;
2643   }
2644 
2645   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2646   if (reuse == MAT_INITIAL_MATRIX) {
2647     /* get new tags to keep the communication clean */
2648     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2649     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2650     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2651 
2652     /* post receives of other's nzlocal */
2653     for (i=0; i<nrecvs; i++) {
2654       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* send nzlocal to others */
2657     for (i=0; i<nsends; i++) {
2658       sbuf_nz[i] = nzlocal;
2659       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2660     }
2661     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2662     count = nrecvs;
2663     while (count) {
2664       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2665 
2666       recv_rank[imdex] = recv_status.MPI_SOURCE;
2667       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2668       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2669 
2670       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2671 
2672       rbuf_nz[imdex] += i + 2;
2673 
2674       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2675       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2676       count--;
2677     }
2678     /* wait on sends of nzlocal */
2679     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2680     /* send mat->i,j to others, and recv from other's */
2681     /*------------------------------------------------*/
2682     for (i=0; i<nsends; i++) {
2683       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2684       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2685     }
2686     /* wait on receives of mat->i,j */
2687     /*------------------------------*/
2688     count = nrecvs;
2689     while (count) {
2690       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2691       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2692       count--;
2693     }
2694     /* wait on sends of mat->i,j */
2695     /*---------------------------*/
2696     if (nsends) {
2697       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2698     }
2699   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2700 
2701   /* post receives, send and receive mat->a */
2702   /*----------------------------------------*/
2703   for (imdex=0; imdex<nrecvs; imdex++) {
2704     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2705   }
2706   for (i=0; i<nsends; i++) {
2707     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2708   }
2709   count = nrecvs;
2710   while (count) {
2711     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2712     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2713     count--;
2714   }
2715   if (nsends) {
2716     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2717   }
2718 
2719   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2720 
2721   /* create redundant matrix */
2722   /*-------------------------*/
2723   if (reuse == MAT_INITIAL_MATRIX) {
2724     const PetscInt *range;
2725     PetscInt       rstart_sub,rend_sub,mloc_sub;
2726 
2727     /* compute rownz_max for preallocation */
2728     for (imdex=0; imdex<nrecvs; imdex++) {
2729       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2730       rptr = rbuf_j[imdex];
2731       for (i=0; i<j; i++) {
2732         ncols = rptr[i+1] - rptr[i];
2733         if (rownz_max < ncols) rownz_max = ncols;
2734       }
2735     }
2736 
2737     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2738 
2739     /* get local size of redundant matrix
2740        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2741     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2742     rstart_sub = range[nsubcomm*subrank];
2743     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2744       rend_sub = range[nsubcomm*(subrank+1)];
2745     } else {
2746       rend_sub = mat->rmap->N;
2747     }
2748     mloc_sub = rend_sub - rstart_sub;
2749 
2750     if (M == N) {
2751       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2752     } else { /* non-square matrix */
2753       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2754     }
2755     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2756     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2757     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2758     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2759   } else {
2760     C = *matredundant;
2761   }
2762 
2763   /* insert local matrix entries */
2764   rptr = sbuf_j;
2765   cols = sbuf_j + rend-rstart + 1;
2766   vals = sbuf_a;
2767   for (i=0; i<rend-rstart; i++) {
2768     row   = i + rstart;
2769     ncols = rptr[i+1] - rptr[i];
2770     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2771     vals += ncols;
2772     cols += ncols;
2773   }
2774   /* insert received matrix entries */
2775   for (imdex=0; imdex<nrecvs; imdex++) {
2776     rstart = rowrange[recv_rank[imdex]];
2777     rend   = rowrange[recv_rank[imdex]+1];
2778     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2779     rptr   = rbuf_j[imdex];
2780     cols   = rbuf_j[imdex] + rend-rstart + 1;
2781     vals   = rbuf_a[imdex];
2782     for (i=0; i<rend-rstart; i++) {
2783       row   = i + rstart;
2784       ncols = rptr[i+1] - rptr[i];
2785       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2786       vals += ncols;
2787       cols += ncols;
2788     }
2789   }
2790   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792 
2793   if (reuse == MAT_INITIAL_MATRIX) {
2794     *matredundant = C;
2795 
2796     /* create a supporting struct and attach it to C for reuse */
2797     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2798     if (subsize == 1) {
2799       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2800       c->redundant = redund;
2801     } else {
2802       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2803       c->redundant = redund;
2804     }
2805 
2806     redund->nzlocal   = nzlocal;
2807     redund->nsends    = nsends;
2808     redund->nrecvs    = nrecvs;
2809     redund->send_rank = send_rank;
2810     redund->recv_rank = recv_rank;
2811     redund->sbuf_nz   = sbuf_nz;
2812     redund->rbuf_nz   = rbuf_nz;
2813     redund->sbuf_j    = sbuf_j;
2814     redund->sbuf_a    = sbuf_a;
2815     redund->rbuf_j    = rbuf_j;
2816     redund->rbuf_a    = rbuf_a;
2817     redund->psubcomm  = NULL;
2818 
2819     redund->Destroy = C->ops->destroy;
2820     C->ops->destroy = MatDestroy_MatRedundant;
2821   }
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2827 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2828 {
2829   PetscErrorCode ierr;
2830   MPI_Comm       comm;
2831   PetscMPIInt    size,subsize;
2832   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2833   Mat_Redundant  *redund=NULL;
2834   PetscSubcomm   psubcomm=NULL;
2835   MPI_Comm       subcomm_in=subcomm;
2836   Mat            *matseq;
2837   IS             isrow,iscol;
2838 
2839   PetscFunctionBegin;
2840   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2841     if (reuse ==  MAT_INITIAL_MATRIX) {
2842       /* create psubcomm, then get subcomm */
2843       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2844       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2845       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2846 
2847       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2848       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2849       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2850       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2851       subcomm = psubcomm->comm;
2852     } else { /* retrieve psubcomm and subcomm */
2853       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2854       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2855       if (subsize == 1) {
2856         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2857         redund = c->redundant;
2858       } else {
2859         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2860         redund = c->redundant;
2861       }
2862       psubcomm = redund->psubcomm;
2863     }
2864     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2865       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2866       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2867         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2868         if (subsize == 1) {
2869           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2870           c->redundant->psubcomm = psubcomm;
2871         } else {
2872           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2873           c->redundant->psubcomm = psubcomm ;
2874         }
2875       }
2876       PetscFunctionReturn(0);
2877     }
2878   }
2879 
2880   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2881   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2882   if (reuse == MAT_INITIAL_MATRIX) {
2883     /* create a local sequential matrix matseq[0] */
2884     mloc_sub = PETSC_DECIDE;
2885     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2886     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2887     rstart = rend - mloc_sub;
2888     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2889     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2890   } else { /* reuse == MAT_REUSE_MATRIX */
2891     if (subsize == 1) {
2892       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2893       redund = c->redundant;
2894     } else {
2895       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2896       redund = c->redundant;
2897     }
2898 
2899     isrow  = redund->isrow;
2900     iscol  = redund->iscol;
2901     matseq = redund->matseq;
2902   }
2903   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2904   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2905 
2906   if (reuse == MAT_INITIAL_MATRIX) {
2907     /* create a supporting struct and attach it to C for reuse */
2908     ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr);
2909     if (subsize == 1) {
2910       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2911       c->redundant = redund;
2912     } else {
2913       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2914       c->redundant = redund;
2915     }
2916     redund->isrow    = isrow;
2917     redund->iscol    = iscol;
2918     redund->matseq   = matseq;
2919     redund->psubcomm = psubcomm;
2920     redund->Destroy               = (*matredundant)->ops->destroy;
2921     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2922   }
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2928 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2929 {
2930   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2931   PetscErrorCode ierr;
2932   PetscInt       i,*idxb = 0;
2933   PetscScalar    *va,*vb;
2934   Vec            vtmp;
2935 
2936   PetscFunctionBegin;
2937   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2938   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2939   if (idx) {
2940     for (i=0; i<A->rmap->n; i++) {
2941       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2942     }
2943   }
2944 
2945   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2946   if (idx) {
2947     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2948   }
2949   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2950   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2951 
2952   for (i=0; i<A->rmap->n; i++) {
2953     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2954       va[i] = vb[i];
2955       if (idx) idx[i] = a->garray[idxb[i]];
2956     }
2957   }
2958 
2959   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2961   ierr = PetscFree(idxb);CHKERRQ(ierr);
2962   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2968 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2969 {
2970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2971   PetscErrorCode ierr;
2972   PetscInt       i,*idxb = 0;
2973   PetscScalar    *va,*vb;
2974   Vec            vtmp;
2975 
2976   PetscFunctionBegin;
2977   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2978   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2979   if (idx) {
2980     for (i=0; i<A->cmap->n; i++) {
2981       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2982     }
2983   }
2984 
2985   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2986   if (idx) {
2987     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2988   }
2989   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2990   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2991 
2992   for (i=0; i<A->rmap->n; i++) {
2993     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2994       va[i] = vb[i];
2995       if (idx) idx[i] = a->garray[idxb[i]];
2996     }
2997   }
2998 
2999   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3000   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3001   ierr = PetscFree(idxb);CHKERRQ(ierr);
3002   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3008 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3009 {
3010   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3011   PetscInt       n      = A->rmap->n;
3012   PetscInt       cstart = A->cmap->rstart;
3013   PetscInt       *cmap  = mat->garray;
3014   PetscInt       *diagIdx, *offdiagIdx;
3015   Vec            diagV, offdiagV;
3016   PetscScalar    *a, *diagA, *offdiagA;
3017   PetscInt       r;
3018   PetscErrorCode ierr;
3019 
3020   PetscFunctionBegin;
3021   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3022   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3023   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3024   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3025   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3026   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3027   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3028   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3029   for (r = 0; r < n; ++r) {
3030     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3031       a[r]   = diagA[r];
3032       idx[r] = cstart + diagIdx[r];
3033     } else {
3034       a[r]   = offdiagA[r];
3035       idx[r] = cmap[offdiagIdx[r]];
3036     }
3037   }
3038   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3039   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3040   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3041   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3042   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3043   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 
3047 #undef __FUNCT__
3048 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3049 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3050 {
3051   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3052   PetscInt       n      = A->rmap->n;
3053   PetscInt       cstart = A->cmap->rstart;
3054   PetscInt       *cmap  = mat->garray;
3055   PetscInt       *diagIdx, *offdiagIdx;
3056   Vec            diagV, offdiagV;
3057   PetscScalar    *a, *diagA, *offdiagA;
3058   PetscInt       r;
3059   PetscErrorCode ierr;
3060 
3061   PetscFunctionBegin;
3062   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3063   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3064   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3065   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3066   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3067   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3068   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3069   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3070   for (r = 0; r < n; ++r) {
3071     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3072       a[r]   = diagA[r];
3073       idx[r] = cstart + diagIdx[r];
3074     } else {
3075       a[r]   = offdiagA[r];
3076       idx[r] = cmap[offdiagIdx[r]];
3077     }
3078   }
3079   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3080   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3081   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3082   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3083   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3084   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3090 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3091 {
3092   PetscErrorCode ierr;
3093   Mat            *dummy;
3094 
3095   PetscFunctionBegin;
3096   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3097   *newmat = *dummy;
3098   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3104 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3105 {
3106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3107   PetscErrorCode ierr;
3108 
3109   PetscFunctionBegin;
3110   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3116 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3117 {
3118   PetscErrorCode ierr;
3119   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3120 
3121   PetscFunctionBegin;
3122   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3123   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3124   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 /* -------------------------------------------------------------------*/
3130 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3131                                        MatGetRow_MPIAIJ,
3132                                        MatRestoreRow_MPIAIJ,
3133                                        MatMult_MPIAIJ,
3134                                 /* 4*/ MatMultAdd_MPIAIJ,
3135                                        MatMultTranspose_MPIAIJ,
3136                                        MatMultTransposeAdd_MPIAIJ,
3137 #if defined(PETSC_HAVE_PBGL)
3138                                        MatSolve_MPIAIJ,
3139 #else
3140                                        0,
3141 #endif
3142                                        0,
3143                                        0,
3144                                 /*10*/ 0,
3145                                        0,
3146                                        0,
3147                                        MatSOR_MPIAIJ,
3148                                        MatTranspose_MPIAIJ,
3149                                 /*15*/ MatGetInfo_MPIAIJ,
3150                                        MatEqual_MPIAIJ,
3151                                        MatGetDiagonal_MPIAIJ,
3152                                        MatDiagonalScale_MPIAIJ,
3153                                        MatNorm_MPIAIJ,
3154                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3155                                        MatAssemblyEnd_MPIAIJ,
3156                                        MatSetOption_MPIAIJ,
3157                                        MatZeroEntries_MPIAIJ,
3158                                 /*24*/ MatZeroRows_MPIAIJ,
3159                                        0,
3160 #if defined(PETSC_HAVE_PBGL)
3161                                        0,
3162 #else
3163                                        0,
3164 #endif
3165                                        0,
3166                                        0,
3167                                 /*29*/ MatSetUp_MPIAIJ,
3168 #if defined(PETSC_HAVE_PBGL)
3169                                        0,
3170 #else
3171                                        0,
3172 #endif
3173                                        0,
3174                                        0,
3175                                        0,
3176                                 /*34*/ MatDuplicate_MPIAIJ,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                        0,
3181                                 /*39*/ MatAXPY_MPIAIJ,
3182                                        MatGetSubMatrices_MPIAIJ,
3183                                        MatIncreaseOverlap_MPIAIJ,
3184                                        MatGetValues_MPIAIJ,
3185                                        MatCopy_MPIAIJ,
3186                                 /*44*/ MatGetRowMax_MPIAIJ,
3187                                        MatScale_MPIAIJ,
3188                                        0,
3189                                        0,
3190                                        MatZeroRowsColumns_MPIAIJ,
3191                                 /*49*/ MatSetRandom_MPIAIJ,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3197                                        0,
3198                                        MatSetUnfactored_MPIAIJ,
3199                                        MatPermute_MPIAIJ,
3200                                        0,
3201                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3202                                        MatDestroy_MPIAIJ,
3203                                        MatView_MPIAIJ,
3204                                        0,
3205                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3206                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3207                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3208                                        0,
3209                                        0,
3210                                        0,
3211                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3212                                        MatGetRowMinAbs_MPIAIJ,
3213                                        0,
3214                                        MatSetColoring_MPIAIJ,
3215                                        0,
3216                                        MatSetValuesAdifor_MPIAIJ,
3217                                 /*75*/ MatFDColoringApply_AIJ,
3218                                        0,
3219                                        0,
3220                                        0,
3221                                        MatFindZeroDiagonals_MPIAIJ,
3222                                 /*80*/ 0,
3223                                        0,
3224                                        0,
3225                                 /*83*/ MatLoad_MPIAIJ,
3226                                        0,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                        0,
3231                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3232                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3233                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3234                                        MatPtAP_MPIAIJ_MPIAIJ,
3235                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3236                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3237                                        0,
3238                                        0,
3239                                        0,
3240                                        0,
3241                                 /*99*/ 0,
3242                                        0,
3243                                        0,
3244                                        MatConjugate_MPIAIJ,
3245                                        0,
3246                                 /*104*/MatSetValuesRow_MPIAIJ,
3247                                        MatRealPart_MPIAIJ,
3248                                        MatImaginaryPart_MPIAIJ,
3249                                        0,
3250                                        0,
3251                                 /*109*/0,
3252                                        MatGetRedundantMatrix_MPIAIJ,
3253                                        MatGetRowMin_MPIAIJ,
3254                                        0,
3255                                        0,
3256                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3257                                        0,
3258                                        0,
3259                                        0,
3260                                        0,
3261                                 /*119*/0,
3262                                        0,
3263                                        0,
3264                                        0,
3265                                        MatGetMultiProcBlock_MPIAIJ,
3266                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3267                                        MatGetColumnNorms_MPIAIJ,
3268                                        MatInvertBlockDiagonal_MPIAIJ,
3269                                        0,
3270                                        MatGetSubMatricesParallel_MPIAIJ,
3271                                 /*129*/0,
3272                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3273                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3274                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3275                                        0,
3276                                 /*134*/0,
3277                                        0,
3278                                        0,
3279                                        0,
3280                                        0,
3281                                 /*139*/0,
3282                                        0,
3283                                        0,
3284                                        MatFDColoringSetUp_MPIXAIJ
3285 };
3286 
3287 /* ----------------------------------------------------------------------------------------*/
3288 
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3291 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3292 {
3293   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3294   PetscErrorCode ierr;
3295 
3296   PetscFunctionBegin;
3297   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3298   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 #undef __FUNCT__
3303 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3304 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3305 {
3306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3307   PetscErrorCode ierr;
3308 
3309   PetscFunctionBegin;
3310   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3311   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3317 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3318 {
3319   Mat_MPIAIJ     *b;
3320   PetscErrorCode ierr;
3321 
3322   PetscFunctionBegin;
3323   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3324   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3325   b = (Mat_MPIAIJ*)B->data;
3326 
3327   if (!B->preallocated) {
3328     /* Explicitly create 2 MATSEQAIJ matrices. */
3329     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3330     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3331     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3332     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3333     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3334     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3335     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3336     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3337     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3338     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3339   }
3340 
3341   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3342   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3343   B->preallocated = PETSC_TRUE;
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 #undef __FUNCT__
3348 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3349 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3350 {
3351   Mat            mat;
3352   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3353   PetscErrorCode ierr;
3354 
3355   PetscFunctionBegin;
3356   *newmat = 0;
3357   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3358   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3359   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3360   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3361   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3362   a       = (Mat_MPIAIJ*)mat->data;
3363 
3364   mat->factortype   = matin->factortype;
3365   mat->rmap->bs     = matin->rmap->bs;
3366   mat->cmap->bs     = matin->cmap->bs;
3367   mat->assembled    = PETSC_TRUE;
3368   mat->insertmode   = NOT_SET_VALUES;
3369   mat->preallocated = PETSC_TRUE;
3370 
3371   a->size         = oldmat->size;
3372   a->rank         = oldmat->rank;
3373   a->donotstash   = oldmat->donotstash;
3374   a->roworiented  = oldmat->roworiented;
3375   a->rowindices   = 0;
3376   a->rowvalues    = 0;
3377   a->getrowactive = PETSC_FALSE;
3378 
3379   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3380   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3381 
3382   if (oldmat->colmap) {
3383 #if defined(PETSC_USE_CTABLE)
3384     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3385 #else
3386     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3387     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3388     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3389 #endif
3390   } else a->colmap = 0;
3391   if (oldmat->garray) {
3392     PetscInt len;
3393     len  = oldmat->B->cmap->n;
3394     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3395     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3396     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3397   } else a->garray = 0;
3398 
3399   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3400   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3401   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3402   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3403   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3404   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3405   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3406   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3407   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3408   *newmat = mat;
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 
3413 
3414 #undef __FUNCT__
3415 #define __FUNCT__ "MatLoad_MPIAIJ"
3416 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3417 {
3418   PetscScalar    *vals,*svals;
3419   MPI_Comm       comm;
3420   PetscErrorCode ierr;
3421   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3422   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3423   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3424   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3425   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3426   int            fd;
3427   PetscInt       bs = 1;
3428 
3429   PetscFunctionBegin;
3430   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3431   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3432   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3433   if (!rank) {
3434     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3435     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3436     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3437   }
3438 
3439   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3440   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3441   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3442 
3443   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3444 
3445   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3446   M    = header[1]; N = header[2];
3447   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3448   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3449   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3450 
3451   /* If global sizes are set, check if they are consistent with that given in the file */
3452   if (sizesset) {
3453     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3454   }
3455   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3456   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3457 
3458   /* determine ownership of all (block) rows */
3459   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3460   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3461   else m = newMat->rmap->n; /* Set by user */
3462 
3463   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3464   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3465 
3466   /* First process needs enough room for process with most rows */
3467   if (!rank) {
3468     mmax = rowners[1];
3469     for (i=2; i<=size; i++) {
3470       mmax = PetscMax(mmax, rowners[i]);
3471     }
3472   } else mmax = -1;             /* unused, but compilers complain */
3473 
3474   rowners[0] = 0;
3475   for (i=2; i<=size; i++) {
3476     rowners[i] += rowners[i-1];
3477   }
3478   rstart = rowners[rank];
3479   rend   = rowners[rank+1];
3480 
3481   /* distribute row lengths to all processors */
3482   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3483   if (!rank) {
3484     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3485     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3486     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3487     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3488     for (j=0; j<m; j++) {
3489       procsnz[0] += ourlens[j];
3490     }
3491     for (i=1; i<size; i++) {
3492       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3493       /* calculate the number of nonzeros on each processor */
3494       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3495         procsnz[i] += rowlengths[j];
3496       }
3497       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3498     }
3499     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3500   } else {
3501     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3502   }
3503 
3504   if (!rank) {
3505     /* determine max buffer needed and allocate it */
3506     maxnz = 0;
3507     for (i=0; i<size; i++) {
3508       maxnz = PetscMax(maxnz,procsnz[i]);
3509     }
3510     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3511 
3512     /* read in my part of the matrix column indices  */
3513     nz   = procsnz[0];
3514     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3515     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3516 
3517     /* read in every one elses and ship off */
3518     for (i=1; i<size; i++) {
3519       nz   = procsnz[i];
3520       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3521       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3522     }
3523     ierr = PetscFree(cols);CHKERRQ(ierr);
3524   } else {
3525     /* determine buffer space needed for message */
3526     nz = 0;
3527     for (i=0; i<m; i++) {
3528       nz += ourlens[i];
3529     }
3530     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3531 
3532     /* receive message of column indices*/
3533     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3534   }
3535 
3536   /* determine column ownership if matrix is not square */
3537   if (N != M) {
3538     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3539     else n = newMat->cmap->n;
3540     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3541     cstart = cend - n;
3542   } else {
3543     cstart = rstart;
3544     cend   = rend;
3545     n      = cend - cstart;
3546   }
3547 
3548   /* loop over local rows, determining number of off diagonal entries */
3549   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3550   jj   = 0;
3551   for (i=0; i<m; i++) {
3552     for (j=0; j<ourlens[i]; j++) {
3553       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3554       jj++;
3555     }
3556   }
3557 
3558   for (i=0; i<m; i++) {
3559     ourlens[i] -= offlens[i];
3560   }
3561   if (!sizesset) {
3562     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3563   }
3564 
3565   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3566 
3567   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3568 
3569   for (i=0; i<m; i++) {
3570     ourlens[i] += offlens[i];
3571   }
3572 
3573   if (!rank) {
3574     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3575 
3576     /* read in my part of the matrix numerical values  */
3577     nz   = procsnz[0];
3578     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3579 
3580     /* insert into matrix */
3581     jj      = rstart;
3582     smycols = mycols;
3583     svals   = vals;
3584     for (i=0; i<m; i++) {
3585       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3586       smycols += ourlens[i];
3587       svals   += ourlens[i];
3588       jj++;
3589     }
3590 
3591     /* read in other processors and ship out */
3592     for (i=1; i<size; i++) {
3593       nz   = procsnz[i];
3594       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3595       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3596     }
3597     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3598   } else {
3599     /* receive numeric values */
3600     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3601 
3602     /* receive message of values*/
3603     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3604 
3605     /* insert into matrix */
3606     jj      = rstart;
3607     smycols = mycols;
3608     svals   = vals;
3609     for (i=0; i<m; i++) {
3610       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3611       smycols += ourlens[i];
3612       svals   += ourlens[i];
3613       jj++;
3614     }
3615   }
3616   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3617   ierr = PetscFree(vals);CHKERRQ(ierr);
3618   ierr = PetscFree(mycols);CHKERRQ(ierr);
3619   ierr = PetscFree(rowners);CHKERRQ(ierr);
3620   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   PetscFunctionReturn(0);
3623 }
3624 
3625 #undef __FUNCT__
3626 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3627 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3628 {
3629   PetscErrorCode ierr;
3630   IS             iscol_local;
3631   PetscInt       csize;
3632 
3633   PetscFunctionBegin;
3634   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3635   if (call == MAT_REUSE_MATRIX) {
3636     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3637     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3638   } else {
3639     PetscInt cbs;
3640     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3641     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3642     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3643   }
3644   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3645   if (call == MAT_INITIAL_MATRIX) {
3646     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3647     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3648   }
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3653 #undef __FUNCT__
3654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3655 /*
3656     Not great since it makes two copies of the submatrix, first an SeqAIJ
3657   in local and then by concatenating the local matrices the end result.
3658   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3659 
3660   Note: This requires a sequential iscol with all indices.
3661 */
3662 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3663 {
3664   PetscErrorCode ierr;
3665   PetscMPIInt    rank,size;
3666   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3667   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3668   PetscBool      allcolumns, colflag;
3669   Mat            M,Mreuse;
3670   MatScalar      *vwork,*aa;
3671   MPI_Comm       comm;
3672   Mat_SeqAIJ     *aij;
3673 
3674   PetscFunctionBegin;
3675   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3678 
3679   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3680   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3681   if (colflag && ncol == mat->cmap->N) {
3682     allcolumns = PETSC_TRUE;
3683   } else {
3684     allcolumns = PETSC_FALSE;
3685   }
3686   if (call ==  MAT_REUSE_MATRIX) {
3687     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3688     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3689     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3690   } else {
3691     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3692   }
3693 
3694   /*
3695       m - number of local rows
3696       n - number of columns (same on all processors)
3697       rstart - first row in new global matrix generated
3698   */
3699   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3700   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3701   if (call == MAT_INITIAL_MATRIX) {
3702     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3703     ii  = aij->i;
3704     jj  = aij->j;
3705 
3706     /*
3707         Determine the number of non-zeros in the diagonal and off-diagonal
3708         portions of the matrix in order to do correct preallocation
3709     */
3710 
3711     /* first get start and end of "diagonal" columns */
3712     if (csize == PETSC_DECIDE) {
3713       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3714       if (mglobal == n) { /* square matrix */
3715         nlocal = m;
3716       } else {
3717         nlocal = n/size + ((n % size) > rank);
3718       }
3719     } else {
3720       nlocal = csize;
3721     }
3722     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3723     rstart = rend - nlocal;
3724     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3725 
3726     /* next, compute all the lengths */
3727     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3728     olens = dlens + m;
3729     for (i=0; i<m; i++) {
3730       jend = ii[i+1] - ii[i];
3731       olen = 0;
3732       dlen = 0;
3733       for (j=0; j<jend; j++) {
3734         if (*jj < rstart || *jj >= rend) olen++;
3735         else dlen++;
3736         jj++;
3737       }
3738       olens[i] = olen;
3739       dlens[i] = dlen;
3740     }
3741     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3742     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3743     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3744     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3745     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3746     ierr = PetscFree(dlens);CHKERRQ(ierr);
3747   } else {
3748     PetscInt ml,nl;
3749 
3750     M    = *newmat;
3751     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3752     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3753     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3754     /*
3755          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3756        rather than the slower MatSetValues().
3757     */
3758     M->was_assembled = PETSC_TRUE;
3759     M->assembled     = PETSC_FALSE;
3760   }
3761   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3762   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3763   ii   = aij->i;
3764   jj   = aij->j;
3765   aa   = aij->a;
3766   for (i=0; i<m; i++) {
3767     row   = rstart + i;
3768     nz    = ii[i+1] - ii[i];
3769     cwork = jj;     jj += nz;
3770     vwork = aa;     aa += nz;
3771     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3772   }
3773 
3774   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   *newmat = M;
3777 
3778   /* save submatrix used in processor for next request */
3779   if (call ==  MAT_INITIAL_MATRIX) {
3780     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3781     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3782   }
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3788 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3789 {
3790   PetscInt       m,cstart, cend,j,nnz,i,d;
3791   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3792   const PetscInt *JJ;
3793   PetscScalar    *values;
3794   PetscErrorCode ierr;
3795 
3796   PetscFunctionBegin;
3797   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3798 
3799   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3800   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3801   m      = B->rmap->n;
3802   cstart = B->cmap->rstart;
3803   cend   = B->cmap->rend;
3804   rstart = B->rmap->rstart;
3805 
3806   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3807 
3808 #if defined(PETSC_USE_DEBUGGING)
3809   for (i=0; i<m; i++) {
3810     nnz = Ii[i+1]- Ii[i];
3811     JJ  = J + Ii[i];
3812     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3813     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3814     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3815   }
3816 #endif
3817 
3818   for (i=0; i<m; i++) {
3819     nnz     = Ii[i+1]- Ii[i];
3820     JJ      = J + Ii[i];
3821     nnz_max = PetscMax(nnz_max,nnz);
3822     d       = 0;
3823     for (j=0; j<nnz; j++) {
3824       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3825     }
3826     d_nnz[i] = d;
3827     o_nnz[i] = nnz - d;
3828   }
3829   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3830   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3831 
3832   if (v) values = (PetscScalar*)v;
3833   else {
3834     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3835     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3836   }
3837 
3838   for (i=0; i<m; i++) {
3839     ii   = i + rstart;
3840     nnz  = Ii[i+1]- Ii[i];
3841     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3842   }
3843   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845 
3846   if (!v) {
3847     ierr = PetscFree(values);CHKERRQ(ierr);
3848   }
3849   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective on MPI_Comm
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown:
3879 
3880         1 0 0
3881         2 0 3     P0
3882        -------
3883         4 5 6     P1
3884 
3885      Process0 [P0]: rows_owned=[0,1]
3886         i =  {0,1,3}  [size = nrow+1  = 2+1]
3887         j =  {0,0,2}  [size = nz = 6]
3888         v =  {1,2,3}  [size = nz = 6]
3889 
3890      Process1 [P1]: rows_owned=[2]
3891         i =  {0,3}    [size = nrow+1  = 1+1]
3892         j =  {0,1,2}  [size = nz = 6]
3893         v =  {4,5,6}  [size = nz = 6]
3894 
3895 .keywords: matrix, aij, compressed row, sparse, parallel
3896 
3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3898           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3899 @*/
3900 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3901 {
3902   PetscErrorCode ierr;
3903 
3904   PetscFunctionBegin;
3905   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 #undef __FUNCT__
3910 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3911 /*@C
3912    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3913    (the default parallel PETSc format).  For good matrix assembly performance
3914    the user should preallocate the matrix storage by setting the parameters
3915    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3916    performance can be increased by more than a factor of 50.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  A - the matrix
3922 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3923            (same value is used for all local rows)
3924 .  d_nnz - array containing the number of nonzeros in the various rows of the
3925            DIAGONAL portion of the local submatrix (possibly different for each row)
3926            or NULL, if d_nz is used to specify the nonzero structure.
3927            The size of this array is equal to the number of local rows, i.e 'm'.
3928            For matrices that will be factored, you must leave room for (and set)
3929            the diagonal entry even if it is zero.
3930 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3931            submatrix (same value is used for all local rows).
3932 -  o_nnz - array containing the number of nonzeros in the various rows of the
3933            OFF-DIAGONAL portion of the local submatrix (possibly different for
3934            each row) or NULL, if o_nz is used to specify the nonzero
3935            structure. The size of this array is equal to the number
3936            of local rows, i.e 'm'.
3937 
3938    If the *_nnz parameter is given then the *_nz parameter is ignored
3939 
3940    The AIJ format (also called the Yale sparse matrix format or
3941    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3942    storage.  The stored row and column indices begin with zero.
3943    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3944 
3945    The parallel matrix is partitioned such that the first m0 rows belong to
3946    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3947    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3948 
3949    The DIAGONAL portion of the local submatrix of a processor can be defined
3950    as the submatrix which is obtained by extraction the part corresponding to
3951    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3952    first row that belongs to the processor, r2 is the last row belonging to
3953    the this processor, and c1-c2 is range of indices of the local part of a
3954    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3955    common case of a square matrix, the row and column ranges are the same and
3956    the DIAGONAL part is also square. The remaining portion of the local
3957    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3958 
3959    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3960 
3961    You can call MatGetInfo() to get information on how effective the preallocation was;
3962    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3963    You can also run with the option -info and look for messages with the string
3964    malloc in them to see if additional memory allocation was needed.
3965 
3966    Example usage:
3967 
3968    Consider the following 8x8 matrix with 34 non-zero values, that is
3969    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3970    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3971    as follows:
3972 
3973 .vb
3974             1  2  0  |  0  3  0  |  0  4
3975     Proc0   0  5  6  |  7  0  0  |  8  0
3976             9  0 10  | 11  0  0  | 12  0
3977     -------------------------------------
3978            13  0 14  | 15 16 17  |  0  0
3979     Proc1   0 18  0  | 19 20 21  |  0  0
3980             0  0  0  | 22 23  0  | 24  0
3981     -------------------------------------
3982     Proc2  25 26 27  |  0  0 28  | 29  0
3983            30  0  0  | 31 32 33  |  0 34
3984 .ve
3985 
3986    This can be represented as a collection of submatrices as:
3987 
3988 .vb
3989       A B C
3990       D E F
3991       G H I
3992 .ve
3993 
3994    Where the submatrices A,B,C are owned by proc0, D,E,F are
3995    owned by proc1, G,H,I are owned by proc2.
3996 
3997    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'M','N' parameters are 8,8, and have the same values on all procs.
4000 
4001    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4002    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4003    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4004    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4005    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4006    matrix, ans [DF] as another SeqAIJ matrix.
4007 
4008    When d_nz, o_nz parameters are specified, d_nz storage elements are
4009    allocated for every row of the local diagonal submatrix, and o_nz
4010    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4011    One way to choose d_nz and o_nz is to use the max nonzerors per local
4012    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4013    In this case, the values of d_nz,o_nz are:
4014 .vb
4015      proc0 : dnz = 2, o_nz = 2
4016      proc1 : dnz = 3, o_nz = 2
4017      proc2 : dnz = 1, o_nz = 4
4018 .ve
4019    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4020    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4021    for proc3. i.e we are using 12+15+10=37 storage locations to store
4022    34 values.
4023 
4024    When d_nnz, o_nnz parameters are specified, the storage is specified
4025    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4026    In the above case the values for d_nnz,o_nnz are:
4027 .vb
4028      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4029      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4030      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4031 .ve
4032    Here the space allocated is sum of all the above values i.e 34, and
4033    hence pre-allocation is perfect.
4034 
4035    Level: intermediate
4036 
4037 .keywords: matrix, aij, compressed row, sparse, parallel
4038 
4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4040           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4041 @*/
4042 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4043 {
4044   PetscErrorCode ierr;
4045 
4046   PetscFunctionBegin;
4047   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4048   PetscValidType(B,1);
4049   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 #undef __FUNCT__
4054 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4055 /*@
4056      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4057          CSR format the local rows.
4058 
4059    Collective on MPI_Comm
4060 
4061    Input Parameters:
4062 +  comm - MPI communicator
4063 .  m - number of local rows (Cannot be PETSC_DECIDE)
4064 .  n - This value should be the same as the local size used in creating the
4065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4066        calculated if N is given) For square matrices n is almost always m.
4067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4069 .   i - row indices
4070 .   j - column indices
4071 -   a - matrix values
4072 
4073    Output Parameter:
4074 .   mat - the matrix
4075 
4076    Level: intermediate
4077 
4078    Notes:
4079        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4080      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4081      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4082 
4083        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4084 
4085        The format which is used for the sparse matrix input, is equivalent to a
4086     row-major ordering.. i.e for the following matrix, the input data expected is
4087     as shown:
4088 
4089         1 0 0
4090         2 0 3     P0
4091        -------
4092         4 5 6     P1
4093 
4094      Process0 [P0]: rows_owned=[0,1]
4095         i =  {0,1,3}  [size = nrow+1  = 2+1]
4096         j =  {0,0,2}  [size = nz = 6]
4097         v =  {1,2,3}  [size = nz = 6]
4098 
4099      Process1 [P1]: rows_owned=[2]
4100         i =  {0,3}    [size = nrow+1  = 1+1]
4101         j =  {0,1,2}  [size = nz = 6]
4102         v =  {4,5,6}  [size = nz = 6]
4103 
4104 .keywords: matrix, aij, compressed row, sparse, parallel
4105 
4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4107           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4108 @*/
4109 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4110 {
4111   PetscErrorCode ierr;
4112 
4113   PetscFunctionBegin;
4114   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4115   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4116   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4117   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4118   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4119   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4120   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatCreateAIJ"
4126 /*@C
4127    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4128    (the default parallel PETSc format).  For good matrix assembly performance
4129    the user should preallocate the matrix storage by setting the parameters
4130    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4131    performance can be increased by more than a factor of 50.
4132 
4133    Collective on MPI_Comm
4134 
4135    Input Parameters:
4136 +  comm - MPI communicator
4137 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4138            This value should be the same as the local size used in creating the
4139            y vector for the matrix-vector product y = Ax.
4140 .  n - This value should be the same as the local size used in creating the
4141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4142        calculated if N is given) For square matrices n is almost always m.
4143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4145 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4146            (same value is used for all local rows)
4147 .  d_nnz - array containing the number of nonzeros in the various rows of the
4148            DIAGONAL portion of the local submatrix (possibly different for each row)
4149            or NULL, if d_nz is used to specify the nonzero structure.
4150            The size of this array is equal to the number of local rows, i.e 'm'.
4151 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4152            submatrix (same value is used for all local rows).
4153 -  o_nnz - array containing the number of nonzeros in the various rows of the
4154            OFF-DIAGONAL portion of the local submatrix (possibly different for
4155            each row) or NULL, if o_nz is used to specify the nonzero
4156            structure. The size of this array is equal to the number
4157            of local rows, i.e 'm'.
4158 
4159    Output Parameter:
4160 .  A - the matrix
4161 
4162    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4163    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4164    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4165 
4166    Notes:
4167    If the *_nnz parameter is given then the *_nz parameter is ignored
4168 
4169    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4170    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4171    storage requirements for this matrix.
4172 
4173    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4174    processor than it must be used on all processors that share the object for
4175    that argument.
4176 
4177    The user MUST specify either the local or global matrix dimensions
4178    (possibly both).
4179 
4180    The parallel matrix is partitioned across processors such that the
4181    first m0 rows belong to process 0, the next m1 rows belong to
4182    process 1, the next m2 rows belong to process 2 etc.. where
4183    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4184    values corresponding to [m x N] submatrix.
4185 
4186    The columns are logically partitioned with the n0 columns belonging
4187    to 0th partition, the next n1 columns belonging to the next
4188    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4189 
4190    The DIAGONAL portion of the local submatrix on any given processor
4191    is the submatrix corresponding to the rows and columns m,n
4192    corresponding to the given processor. i.e diagonal matrix on
4193    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4194    etc. The remaining portion of the local submatrix [m x (N-n)]
4195    constitute the OFF-DIAGONAL portion. The example below better
4196    illustrates this concept.
4197 
4198    For a square global matrix we define each processor's diagonal portion
4199    to be its local rows and the corresponding columns (a square submatrix);
4200    each processor's off-diagonal portion encompasses the remainder of the
4201    local matrix (a rectangular submatrix).
4202 
4203    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4204 
4205    When calling this routine with a single process communicator, a matrix of
4206    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4207    type of communicator, use the construction mechanism:
4208      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4209 
4210    By default, this format uses inodes (identical nodes) when possible.
4211    We search for consecutive rows with the same nonzero structure, thereby
4212    reusing matrix information to achieve increased efficiency.
4213 
4214    Options Database Keys:
4215 +  -mat_no_inode  - Do not use inodes
4216 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4217 -  -mat_aij_oneindex - Internally use indexing starting at 1
4218         rather than 0.  Note that when calling MatSetValues(),
4219         the user still MUST index entries starting at 0!
4220 
4221 
4222    Example usage:
4223 
4224    Consider the following 8x8 matrix with 34 non-zero values, that is
4225    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4226    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4227    as follows:
4228 
4229 .vb
4230             1  2  0  |  0  3  0  |  0  4
4231     Proc0   0  5  6  |  7  0  0  |  8  0
4232             9  0 10  | 11  0  0  | 12  0
4233     -------------------------------------
4234            13  0 14  | 15 16 17  |  0  0
4235     Proc1   0 18  0  | 19 20 21  |  0  0
4236             0  0  0  | 22 23  0  | 24  0
4237     -------------------------------------
4238     Proc2  25 26 27  |  0  0 28  | 29  0
4239            30  0  0  | 31 32 33  |  0 34
4240 .ve
4241 
4242    This can be represented as a collection of submatrices as:
4243 
4244 .vb
4245       A B C
4246       D E F
4247       G H I
4248 .ve
4249 
4250    Where the submatrices A,B,C are owned by proc0, D,E,F are
4251    owned by proc1, G,H,I are owned by proc2.
4252 
4253    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4254    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'M','N' parameters are 8,8, and have the same values on all procs.
4256 
4257    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4258    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4259    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4260    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4261    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4262    matrix, ans [DF] as another SeqAIJ matrix.
4263 
4264    When d_nz, o_nz parameters are specified, d_nz storage elements are
4265    allocated for every row of the local diagonal submatrix, and o_nz
4266    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4267    One way to choose d_nz and o_nz is to use the max nonzerors per local
4268    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4269    In this case, the values of d_nz,o_nz are:
4270 .vb
4271      proc0 : dnz = 2, o_nz = 2
4272      proc1 : dnz = 3, o_nz = 2
4273      proc2 : dnz = 1, o_nz = 4
4274 .ve
4275    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4276    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4277    for proc3. i.e we are using 12+15+10=37 storage locations to store
4278    34 values.
4279 
4280    When d_nnz, o_nnz parameters are specified, the storage is specified
4281    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4282    In the above case the values for d_nnz,o_nnz are:
4283 .vb
4284      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4285      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4286      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4287 .ve
4288    Here the space allocated is sum of all the above values i.e 34, and
4289    hence pre-allocation is perfect.
4290 
4291    Level: intermediate
4292 
4293 .keywords: matrix, aij, compressed row, sparse, parallel
4294 
4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4296           MPIAIJ, MatCreateMPIAIJWithArrays()
4297 @*/
4298 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4299 {
4300   PetscErrorCode ierr;
4301   PetscMPIInt    size;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4305   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4306   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4307   if (size > 1) {
4308     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4309     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4310   } else {
4311     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4312     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4313   }
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4319 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4320 {
4321   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4322 
4323   PetscFunctionBegin;
4324   *Ad     = a->A;
4325   *Ao     = a->B;
4326   *colmap = a->garray;
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 #undef __FUNCT__
4331 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4332 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4333 {
4334   PetscErrorCode ierr;
4335   PetscInt       i;
4336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4337 
4338   PetscFunctionBegin;
4339   if (coloring->ctype == IS_COLORING_GLOBAL) {
4340     ISColoringValue *allcolors,*colors;
4341     ISColoring      ocoloring;
4342 
4343     /* set coloring for diagonal portion */
4344     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4345 
4346     /* set coloring for off-diagonal portion */
4347     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4348     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4349     for (i=0; i<a->B->cmap->n; i++) {
4350       colors[i] = allcolors[a->garray[i]];
4351     }
4352     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4353     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4354     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4355     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4356   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4357     ISColoringValue *colors;
4358     PetscInt        *larray;
4359     ISColoring      ocoloring;
4360 
4361     /* set coloring for diagonal portion */
4362     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4363     for (i=0; i<a->A->cmap->n; i++) {
4364       larray[i] = i + A->cmap->rstart;
4365     }
4366     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4367     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4368     for (i=0; i<a->A->cmap->n; i++) {
4369       colors[i] = coloring->colors[larray[i]];
4370     }
4371     ierr = PetscFree(larray);CHKERRQ(ierr);
4372     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4373     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4374     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4375 
4376     /* set coloring for off-diagonal portion */
4377     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4378     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4379     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4380     for (i=0; i<a->B->cmap->n; i++) {
4381       colors[i] = coloring->colors[larray[i]];
4382     }
4383     ierr = PetscFree(larray);CHKERRQ(ierr);
4384     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4385     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4386     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4387   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #undef __FUNCT__
4392 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4393 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4394 {
4395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4396   PetscErrorCode ierr;
4397 
4398   PetscFunctionBegin;
4399   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4400   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4406 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4407 {
4408   PetscErrorCode ierr;
4409   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4410   PetscInt       *indx;
4411 
4412   PetscFunctionBegin;
4413   /* This routine will ONLY return MPIAIJ type matrix */
4414   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4415   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4416   if (n == PETSC_DECIDE) {
4417     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4418   }
4419   /* Check sum(n) = N */
4420   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4421   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4422 
4423   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4424   rstart -= m;
4425 
4426   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4427   for (i=0; i<m; i++) {
4428     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4429     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4430     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4431   }
4432 
4433   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4434   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4435   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4436   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4437   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4438   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #undef __FUNCT__
4443 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4444 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4445 {
4446   PetscErrorCode ierr;
4447   PetscInt       m,N,i,rstart,nnz,Ii;
4448   PetscInt       *indx;
4449   PetscScalar    *values;
4450 
4451   PetscFunctionBegin;
4452   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4453   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4454   for (i=0; i<m; i++) {
4455     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4456     Ii   = i + rstart;
4457     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4458     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4459   }
4460   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4461   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462   PetscFunctionReturn(0);
4463 }
4464 
4465 #undef __FUNCT__
4466 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4467 /*@
4468       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4469                  matrices from each processor
4470 
4471     Collective on MPI_Comm
4472 
4473    Input Parameters:
4474 +    comm - the communicators the parallel matrix will live on
4475 .    inmat - the input sequential matrices
4476 .    n - number of local columns (or PETSC_DECIDE)
4477 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4478 
4479    Output Parameter:
4480 .    outmat - the parallel matrix generated
4481 
4482     Level: advanced
4483 
4484    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4485 
4486 @*/
4487 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4488 {
4489   PetscErrorCode ierr;
4490   PetscMPIInt    size;
4491 
4492   PetscFunctionBegin;
4493   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4494   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4495   if (size == 1) {
4496     if (scall == MAT_INITIAL_MATRIX) {
4497       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4498     } else {
4499       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4500     }
4501   } else {
4502     if (scall == MAT_INITIAL_MATRIX) {
4503       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4504     }
4505     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4506   }
4507   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 #undef __FUNCT__
4512 #define __FUNCT__ "MatFileSplit"
4513 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4514 {
4515   PetscErrorCode    ierr;
4516   PetscMPIInt       rank;
4517   PetscInt          m,N,i,rstart,nnz;
4518   size_t            len;
4519   const PetscInt    *indx;
4520   PetscViewer       out;
4521   char              *name;
4522   Mat               B;
4523   const PetscScalar *values;
4524 
4525   PetscFunctionBegin;
4526   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4527   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4528   /* Should this be the type of the diagonal block of A? */
4529   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4530   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4531   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4532   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4533   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4534   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4535   for (i=0; i<m; i++) {
4536     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4537     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4538     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4539   }
4540   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4541   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4542 
4543   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4544   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4545   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4546   sprintf(name,"%s.%d",outfile,rank);
4547   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4548   ierr = PetscFree(name);CHKERRQ(ierr);
4549   ierr = MatView(B,out);CHKERRQ(ierr);
4550   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4551   ierr = MatDestroy(&B);CHKERRQ(ierr);
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4556 #undef __FUNCT__
4557 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4558 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4559 {
4560   PetscErrorCode      ierr;
4561   Mat_Merge_SeqsToMPI *merge;
4562   PetscContainer      container;
4563 
4564   PetscFunctionBegin;
4565   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4566   if (container) {
4567     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4579     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4580     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4581     ierr = PetscFree(merge);CHKERRQ(ierr);
4582     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4583   }
4584   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #include <../src/mat/utils/freespace.h>
4589 #include <petscbt.h>
4590 
4591 #undef __FUNCT__
4592 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4593 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4594 {
4595   PetscErrorCode      ierr;
4596   MPI_Comm            comm;
4597   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4598   PetscMPIInt         size,rank,taga,*len_s;
4599   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4600   PetscInt            proc,m;
4601   PetscInt            **buf_ri,**buf_rj;
4602   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4603   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4604   MPI_Request         *s_waits,*r_waits;
4605   MPI_Status          *status;
4606   MatScalar           *aa=a->a;
4607   MatScalar           **abuf_r,*ba_i;
4608   Mat_Merge_SeqsToMPI *merge;
4609   PetscContainer      container;
4610 
4611   PetscFunctionBegin;
4612   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4613   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4614 
4615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4616   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4617 
4618   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4619   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4620 
4621   bi     = merge->bi;
4622   bj     = merge->bj;
4623   buf_ri = merge->buf_ri;
4624   buf_rj = merge->buf_rj;
4625 
4626   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4627   owners = merge->rowmap->range;
4628   len_s  = merge->len_s;
4629 
4630   /* send and recv matrix values */
4631   /*-----------------------------*/
4632   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4633   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4634 
4635   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4636   for (proc=0,k=0; proc<size; proc++) {
4637     if (!len_s[proc]) continue;
4638     i    = owners[proc];
4639     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4640     k++;
4641   }
4642 
4643   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4644   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4645   ierr = PetscFree(status);CHKERRQ(ierr);
4646 
4647   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4648   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4649 
4650   /* insert mat values of mpimat */
4651   /*----------------------------*/
4652   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4653   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4654 
4655   for (k=0; k<merge->nrecv; k++) {
4656     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4657     nrows       = *(buf_ri_k[k]);
4658     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4659     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4660   }
4661 
4662   /* set values of ba */
4663   m = merge->rowmap->n;
4664   for (i=0; i<m; i++) {
4665     arow = owners[rank] + i;
4666     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4667     bnzi = bi[i+1] - bi[i];
4668     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4669 
4670     /* add local non-zero vals of this proc's seqmat into ba */
4671     anzi   = ai[arow+1] - ai[arow];
4672     aj     = a->j + ai[arow];
4673     aa     = a->a + ai[arow];
4674     nextaj = 0;
4675     for (j=0; nextaj<anzi; j++) {
4676       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4677         ba_i[j] += aa[nextaj++];
4678       }
4679     }
4680 
4681     /* add received vals into ba */
4682     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4683       /* i-th row */
4684       if (i == *nextrow[k]) {
4685         anzi   = *(nextai[k]+1) - *nextai[k];
4686         aj     = buf_rj[k] + *(nextai[k]);
4687         aa     = abuf_r[k] + *(nextai[k]);
4688         nextaj = 0;
4689         for (j=0; nextaj<anzi; j++) {
4690           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4691             ba_i[j] += aa[nextaj++];
4692           }
4693         }
4694         nextrow[k]++; nextai[k]++;
4695       }
4696     }
4697     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4698   }
4699   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4700   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701 
4702   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4703   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4704   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4705   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4706   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4707   PetscFunctionReturn(0);
4708 }
4709 
4710 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4711 
4712 #undef __FUNCT__
4713 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4714 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4715 {
4716   PetscErrorCode      ierr;
4717   Mat                 B_mpi;
4718   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4719   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4720   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4721   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4722   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4723   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4724   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4725   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4726   MPI_Status          *status;
4727   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4728   PetscBT             lnkbt;
4729   Mat_Merge_SeqsToMPI *merge;
4730   PetscContainer      container;
4731 
4732   PetscFunctionBegin;
4733   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4734 
4735   /* make sure it is a PETSc comm */
4736   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4737   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4738   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4739 
4740   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4741   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4742 
4743   /* determine row ownership */
4744   /*---------------------------------------------------------*/
4745   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4746   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4747   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4748   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4749   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4750   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4751   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4752 
4753   m      = merge->rowmap->n;
4754   owners = merge->rowmap->range;
4755 
4756   /* determine the number of messages to send, their lengths */
4757   /*---------------------------------------------------------*/
4758   len_s = merge->len_s;
4759 
4760   len          = 0; /* length of buf_si[] */
4761   merge->nsend = 0;
4762   for (proc=0; proc<size; proc++) {
4763     len_si[proc] = 0;
4764     if (proc == rank) {
4765       len_s[proc] = 0;
4766     } else {
4767       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4768       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4769     }
4770     if (len_s[proc]) {
4771       merge->nsend++;
4772       nrows = 0;
4773       for (i=owners[proc]; i<owners[proc+1]; i++) {
4774         if (ai[i+1] > ai[i]) nrows++;
4775       }
4776       len_si[proc] = 2*(nrows+1);
4777       len         += len_si[proc];
4778     }
4779   }
4780 
4781   /* determine the number and length of messages to receive for ij-structure */
4782   /*-------------------------------------------------------------------------*/
4783   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4784   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4785 
4786   /* post the Irecv of j-structure */
4787   /*-------------------------------*/
4788   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4789   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4790 
4791   /* post the Isend of j-structure */
4792   /*--------------------------------*/
4793   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4794 
4795   for (proc=0, k=0; proc<size; proc++) {
4796     if (!len_s[proc]) continue;
4797     i    = owners[proc];
4798     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4799     k++;
4800   }
4801 
4802   /* receives and sends of j-structure are complete */
4803   /*------------------------------------------------*/
4804   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4805   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4806 
4807   /* send and recv i-structure */
4808   /*---------------------------*/
4809   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4810   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4811 
4812   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4813   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4814   for (proc=0,k=0; proc<size; proc++) {
4815     if (!len_s[proc]) continue;
4816     /* form outgoing message for i-structure:
4817          buf_si[0]:                 nrows to be sent
4818                [1:nrows]:           row index (global)
4819                [nrows+1:2*nrows+1]: i-structure index
4820     */
4821     /*-------------------------------------------*/
4822     nrows       = len_si[proc]/2 - 1;
4823     buf_si_i    = buf_si + nrows+1;
4824     buf_si[0]   = nrows;
4825     buf_si_i[0] = 0;
4826     nrows       = 0;
4827     for (i=owners[proc]; i<owners[proc+1]; i++) {
4828       anzi = ai[i+1] - ai[i];
4829       if (anzi) {
4830         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4831         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4832         nrows++;
4833       }
4834     }
4835     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4836     k++;
4837     buf_si += len_si[proc];
4838   }
4839 
4840   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4841   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4842 
4843   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4844   for (i=0; i<merge->nrecv; i++) {
4845     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4846   }
4847 
4848   ierr = PetscFree(len_si);CHKERRQ(ierr);
4849   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4850   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4851   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4852   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4853   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4854   ierr = PetscFree(status);CHKERRQ(ierr);
4855 
4856   /* compute a local seq matrix in each processor */
4857   /*----------------------------------------------*/
4858   /* allocate bi array and free space for accumulating nonzero column info */
4859   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4860   bi[0] = 0;
4861 
4862   /* create and initialize a linked list */
4863   nlnk = N+1;
4864   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4865 
4866   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4867   len  = ai[owners[rank+1]] - ai[owners[rank]];
4868   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4869 
4870   current_space = free_space;
4871 
4872   /* determine symbolic info for each local row */
4873   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4874 
4875   for (k=0; k<merge->nrecv; k++) {
4876     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4877     nrows       = *buf_ri_k[k];
4878     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4879     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4880   }
4881 
4882   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4883   len  = 0;
4884   for (i=0; i<m; i++) {
4885     bnzi = 0;
4886     /* add local non-zero cols of this proc's seqmat into lnk */
4887     arow  = owners[rank] + i;
4888     anzi  = ai[arow+1] - ai[arow];
4889     aj    = a->j + ai[arow];
4890     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4891     bnzi += nlnk;
4892     /* add received col data into lnk */
4893     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4894       if (i == *nextrow[k]) { /* i-th row */
4895         anzi  = *(nextai[k]+1) - *nextai[k];
4896         aj    = buf_rj[k] + *nextai[k];
4897         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4898         bnzi += nlnk;
4899         nextrow[k]++; nextai[k]++;
4900       }
4901     }
4902     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4903 
4904     /* if free space is not available, make more free space */
4905     if (current_space->local_remaining<bnzi) {
4906       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4907       nspacedouble++;
4908     }
4909     /* copy data into free space, then initialize lnk */
4910     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4911     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4912 
4913     current_space->array           += bnzi;
4914     current_space->local_used      += bnzi;
4915     current_space->local_remaining -= bnzi;
4916 
4917     bi[i+1] = bi[i] + bnzi;
4918   }
4919 
4920   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4921 
4922   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4923   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4924   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4925 
4926   /* create symbolic parallel matrix B_mpi */
4927   /*---------------------------------------*/
4928   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4929   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4930   if (n==PETSC_DECIDE) {
4931     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4932   } else {
4933     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4934   }
4935   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4936   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4937   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4938   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4939   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4940 
4941   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4942   B_mpi->assembled    = PETSC_FALSE;
4943   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4944   merge->bi           = bi;
4945   merge->bj           = bj;
4946   merge->buf_ri       = buf_ri;
4947   merge->buf_rj       = buf_rj;
4948   merge->coi          = NULL;
4949   merge->coj          = NULL;
4950   merge->owners_co    = NULL;
4951 
4952   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4953 
4954   /* attach the supporting struct to B_mpi for reuse */
4955   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4956   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4957   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4958   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4959   *mpimat = B_mpi;
4960 
4961   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4967 /*@C
4968       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4969                  matrices from each processor
4970 
4971     Collective on MPI_Comm
4972 
4973    Input Parameters:
4974 +    comm - the communicators the parallel matrix will live on
4975 .    seqmat - the input sequential matrices
4976 .    m - number of local rows (or PETSC_DECIDE)
4977 .    n - number of local columns (or PETSC_DECIDE)
4978 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4979 
4980    Output Parameter:
4981 .    mpimat - the parallel matrix generated
4982 
4983     Level: advanced
4984 
4985    Notes:
4986      The dimensions of the sequential matrix in each processor MUST be the same.
4987      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4988      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4989 @*/
4990 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4991 {
4992   PetscErrorCode ierr;
4993   PetscMPIInt    size;
4994 
4995   PetscFunctionBegin;
4996   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4997   if (size == 1) {
4998     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4999     if (scall == MAT_INITIAL_MATRIX) {
5000       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5001     } else {
5002       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5003     }
5004     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005     PetscFunctionReturn(0);
5006   }
5007   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008   if (scall == MAT_INITIAL_MATRIX) {
5009     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5010   }
5011   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5012   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5018 /*@
5019      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5020           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5021           with MatGetSize()
5022 
5023     Not Collective
5024 
5025    Input Parameters:
5026 +    A - the matrix
5027 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5028 
5029    Output Parameter:
5030 .    A_loc - the local sequential matrix generated
5031 
5032     Level: developer
5033 
5034 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5035 
5036 @*/
5037 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5038 {
5039   PetscErrorCode ierr;
5040   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5041   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5042   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5043   MatScalar      *aa=a->a,*ba=b->a,*cam;
5044   PetscScalar    *ca;
5045   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5046   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5047   PetscBool      match;
5048 
5049   PetscFunctionBegin;
5050   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5051   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5052   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5053   if (scall == MAT_INITIAL_MATRIX) {
5054     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5055     ci[0] = 0;
5056     for (i=0; i<am; i++) {
5057       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5058     }
5059     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5060     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5061     k    = 0;
5062     for (i=0; i<am; i++) {
5063       ncols_o = bi[i+1] - bi[i];
5064       ncols_d = ai[i+1] - ai[i];
5065       /* off-diagonal portion of A */
5066       for (jo=0; jo<ncols_o; jo++) {
5067         col = cmap[*bj];
5068         if (col >= cstart) break;
5069         cj[k]   = col; bj++;
5070         ca[k++] = *ba++;
5071       }
5072       /* diagonal portion of A */
5073       for (j=0; j<ncols_d; j++) {
5074         cj[k]   = cstart + *aj++;
5075         ca[k++] = *aa++;
5076       }
5077       /* off-diagonal portion of A */
5078       for (j=jo; j<ncols_o; j++) {
5079         cj[k]   = cmap[*bj++];
5080         ca[k++] = *ba++;
5081       }
5082     }
5083     /* put together the new matrix */
5084     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5085     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5086     /* Since these are PETSc arrays, change flags to free them as necessary. */
5087     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5088     mat->free_a  = PETSC_TRUE;
5089     mat->free_ij = PETSC_TRUE;
5090     mat->nonew   = 0;
5091   } else if (scall == MAT_REUSE_MATRIX) {
5092     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5093     ci = mat->i; cj = mat->j; cam = mat->a;
5094     for (i=0; i<am; i++) {
5095       /* off-diagonal portion of A */
5096       ncols_o = bi[i+1] - bi[i];
5097       for (jo=0; jo<ncols_o; jo++) {
5098         col = cmap[*bj];
5099         if (col >= cstart) break;
5100         *cam++ = *ba++; bj++;
5101       }
5102       /* diagonal portion of A */
5103       ncols_d = ai[i+1] - ai[i];
5104       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5105       /* off-diagonal portion of A */
5106       for (j=jo; j<ncols_o; j++) {
5107         *cam++ = *ba++; bj++;
5108       }
5109     }
5110   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5111   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5117 /*@C
5118      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5119 
5120     Not Collective
5121 
5122    Input Parameters:
5123 +    A - the matrix
5124 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 -    row, col - index sets of rows and columns to extract (or NULL)
5126 
5127    Output Parameter:
5128 .    A_loc - the local sequential matrix generated
5129 
5130     Level: developer
5131 
5132 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5133 
5134 @*/
5135 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5136 {
5137   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5138   PetscErrorCode ierr;
5139   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5140   IS             isrowa,iscola;
5141   Mat            *aloc;
5142   PetscBool      match;
5143 
5144   PetscFunctionBegin;
5145   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5146   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5147   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5148   if (!row) {
5149     start = A->rmap->rstart; end = A->rmap->rend;
5150     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5151   } else {
5152     isrowa = *row;
5153   }
5154   if (!col) {
5155     start = A->cmap->rstart;
5156     cmap  = a->garray;
5157     nzA   = a->A->cmap->n;
5158     nzB   = a->B->cmap->n;
5159     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5160     ncols = 0;
5161     for (i=0; i<nzB; i++) {
5162       if (cmap[i] < start) idx[ncols++] = cmap[i];
5163       else break;
5164     }
5165     imark = i;
5166     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5167     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5168     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5169   } else {
5170     iscola = *col;
5171   }
5172   if (scall != MAT_INITIAL_MATRIX) {
5173     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5174     aloc[0] = *A_loc;
5175   }
5176   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5177   *A_loc = aloc[0];
5178   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5179   if (!row) {
5180     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5181   }
5182   if (!col) {
5183     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5184   }
5185   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5186   PetscFunctionReturn(0);
5187 }
5188 
5189 #undef __FUNCT__
5190 #define __FUNCT__ "MatGetBrowsOfAcols"
5191 /*@C
5192     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5193 
5194     Collective on Mat
5195 
5196    Input Parameters:
5197 +    A,B - the matrices in mpiaij format
5198 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5199 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5200 
5201    Output Parameter:
5202 +    rowb, colb - index sets of rows and columns of B to extract
5203 -    B_seq - the sequential matrix generated
5204 
5205     Level: developer
5206 
5207 @*/
5208 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5209 {
5210   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5211   PetscErrorCode ierr;
5212   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5213   IS             isrowb,iscolb;
5214   Mat            *bseq=NULL;
5215 
5216   PetscFunctionBegin;
5217   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5218     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5219   }
5220   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5221 
5222   if (scall == MAT_INITIAL_MATRIX) {
5223     start = A->cmap->rstart;
5224     cmap  = a->garray;
5225     nzA   = a->A->cmap->n;
5226     nzB   = a->B->cmap->n;
5227     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5228     ncols = 0;
5229     for (i=0; i<nzB; i++) {  /* row < local row index */
5230       if (cmap[i] < start) idx[ncols++] = cmap[i];
5231       else break;
5232     }
5233     imark = i;
5234     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5235     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5236     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5237     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5238   } else {
5239     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5240     isrowb  = *rowb; iscolb = *colb;
5241     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5242     bseq[0] = *B_seq;
5243   }
5244   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5245   *B_seq = bseq[0];
5246   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5247   if (!rowb) {
5248     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5249   } else {
5250     *rowb = isrowb;
5251   }
5252   if (!colb) {
5253     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5254   } else {
5255     *colb = iscolb;
5256   }
5257   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 #undef __FUNCT__
5262 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5263 /*
5264     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5265     of the OFF-DIAGONAL portion of local A
5266 
5267     Collective on Mat
5268 
5269    Input Parameters:
5270 +    A,B - the matrices in mpiaij format
5271 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5272 
5273    Output Parameter:
5274 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5275 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5276 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5277 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5278 
5279     Level: developer
5280 
5281 */
5282 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5283 {
5284   VecScatter_MPI_General *gen_to,*gen_from;
5285   PetscErrorCode         ierr;
5286   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5287   Mat_SeqAIJ             *b_oth;
5288   VecScatter             ctx =a->Mvctx;
5289   MPI_Comm               comm;
5290   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5291   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5292   PetscScalar            *rvalues,*svalues;
5293   MatScalar              *b_otha,*bufa,*bufA;
5294   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5295   MPI_Request            *rwaits = NULL,*swaits = NULL;
5296   MPI_Status             *sstatus,rstatus;
5297   PetscMPIInt            jj;
5298   PetscInt               *cols,sbs,rbs;
5299   PetscScalar            *vals;
5300 
5301   PetscFunctionBegin;
5302   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5303   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5304     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5305   }
5306   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5307   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5308 
5309   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5310   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5311   rvalues  = gen_from->values; /* holds the length of receiving row */
5312   svalues  = gen_to->values;   /* holds the length of sending row */
5313   nrecvs   = gen_from->n;
5314   nsends   = gen_to->n;
5315 
5316   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5317   srow    = gen_to->indices;    /* local row index to be sent */
5318   sstarts = gen_to->starts;
5319   sprocs  = gen_to->procs;
5320   sstatus = gen_to->sstatus;
5321   sbs     = gen_to->bs;
5322   rstarts = gen_from->starts;
5323   rprocs  = gen_from->procs;
5324   rbs     = gen_from->bs;
5325 
5326   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5327   if (scall == MAT_INITIAL_MATRIX) {
5328     /* i-array */
5329     /*---------*/
5330     /*  post receives */
5331     for (i=0; i<nrecvs; i++) {
5332       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5333       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5334       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5335     }
5336 
5337     /* pack the outgoing message */
5338     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5339 
5340     sstartsj[0] = 0;
5341     rstartsj[0] = 0;
5342     len         = 0; /* total length of j or a array to be sent */
5343     k           = 0;
5344     for (i=0; i<nsends; i++) {
5345       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5346       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5347       for (j=0; j<nrows; j++) {
5348         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5349         for (l=0; l<sbs; l++) {
5350           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5351 
5352           rowlen[j*sbs+l] = ncols;
5353 
5354           len += ncols;
5355           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5356         }
5357         k++;
5358       }
5359       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5360 
5361       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5362     }
5363     /* recvs and sends of i-array are completed */
5364     i = nrecvs;
5365     while (i--) {
5366       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5367     }
5368     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5369 
5370     /* allocate buffers for sending j and a arrays */
5371     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5372     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5373 
5374     /* create i-array of B_oth */
5375     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5376 
5377     b_othi[0] = 0;
5378     len       = 0; /* total length of j or a array to be received */
5379     k         = 0;
5380     for (i=0; i<nrecvs; i++) {
5381       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5382       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5383       for (j=0; j<nrows; j++) {
5384         b_othi[k+1] = b_othi[k] + rowlen[j];
5385         len        += rowlen[j]; k++;
5386       }
5387       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5388     }
5389 
5390     /* allocate space for j and a arrrays of B_oth */
5391     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5392     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5393 
5394     /* j-array */
5395     /*---------*/
5396     /*  post receives of j-array */
5397     for (i=0; i<nrecvs; i++) {
5398       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5399       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5400     }
5401 
5402     /* pack the outgoing message j-array */
5403     k = 0;
5404     for (i=0; i<nsends; i++) {
5405       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5406       bufJ  = bufj+sstartsj[i];
5407       for (j=0; j<nrows; j++) {
5408         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5409         for (ll=0; ll<sbs; ll++) {
5410           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5411           for (l=0; l<ncols; l++) {
5412             *bufJ++ = cols[l];
5413           }
5414           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5415         }
5416       }
5417       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5418     }
5419 
5420     /* recvs and sends of j-array are completed */
5421     i = nrecvs;
5422     while (i--) {
5423       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5424     }
5425     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5426   } else if (scall == MAT_REUSE_MATRIX) {
5427     sstartsj = *startsj_s;
5428     rstartsj = *startsj_r;
5429     bufa     = *bufa_ptr;
5430     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5431     b_otha   = b_oth->a;
5432   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5433 
5434   /* a-array */
5435   /*---------*/
5436   /*  post receives of a-array */
5437   for (i=0; i<nrecvs; i++) {
5438     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5439     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5440   }
5441 
5442   /* pack the outgoing message a-array */
5443   k = 0;
5444   for (i=0; i<nsends; i++) {
5445     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5446     bufA  = bufa+sstartsj[i];
5447     for (j=0; j<nrows; j++) {
5448       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5449       for (ll=0; ll<sbs; ll++) {
5450         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5451         for (l=0; l<ncols; l++) {
5452           *bufA++ = vals[l];
5453         }
5454         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5455       }
5456     }
5457     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5458   }
5459   /* recvs and sends of a-array are completed */
5460   i = nrecvs;
5461   while (i--) {
5462     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5463   }
5464   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5465   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5466 
5467   if (scall == MAT_INITIAL_MATRIX) {
5468     /* put together the new matrix */
5469     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5470 
5471     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5472     /* Since these are PETSc arrays, change flags to free them as necessary. */
5473     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5474     b_oth->free_a  = PETSC_TRUE;
5475     b_oth->free_ij = PETSC_TRUE;
5476     b_oth->nonew   = 0;
5477 
5478     ierr = PetscFree(bufj);CHKERRQ(ierr);
5479     if (!startsj_s || !bufa_ptr) {
5480       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5481       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5482     } else {
5483       *startsj_s = sstartsj;
5484       *startsj_r = rstartsj;
5485       *bufa_ptr  = bufa;
5486     }
5487   }
5488   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 #undef __FUNCT__
5493 #define __FUNCT__ "MatGetCommunicationStructs"
5494 /*@C
5495   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5496 
5497   Not Collective
5498 
5499   Input Parameters:
5500 . A - The matrix in mpiaij format
5501 
5502   Output Parameter:
5503 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5504 . colmap - A map from global column index to local index into lvec
5505 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5506 
5507   Level: developer
5508 
5509 @*/
5510 #if defined(PETSC_USE_CTABLE)
5511 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5512 #else
5513 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5514 #endif
5515 {
5516   Mat_MPIAIJ *a;
5517 
5518   PetscFunctionBegin;
5519   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5520   PetscValidPointer(lvec, 2);
5521   PetscValidPointer(colmap, 3);
5522   PetscValidPointer(multScatter, 4);
5523   a = (Mat_MPIAIJ*) A->data;
5524   if (lvec) *lvec = a->lvec;
5525   if (colmap) *colmap = a->colmap;
5526   if (multScatter) *multScatter = a->Mvctx;
5527   PetscFunctionReturn(0);
5528 }
5529 
5530 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5531 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5532 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5533 
5534 #undef __FUNCT__
5535 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5536 /*
5537     Computes (B'*A')' since computing B*A directly is untenable
5538 
5539                n                       p                          p
5540         (              )       (              )         (                  )
5541       m (      A       )  *  n (       B      )   =   m (         C        )
5542         (              )       (              )         (                  )
5543 
5544 */
5545 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5546 {
5547   PetscErrorCode ierr;
5548   Mat            At,Bt,Ct;
5549 
5550   PetscFunctionBegin;
5551   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5552   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5553   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5554   ierr = MatDestroy(&At);CHKERRQ(ierr);
5555   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5556   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5557   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5558   PetscFunctionReturn(0);
5559 }
5560 
5561 #undef __FUNCT__
5562 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5563 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5564 {
5565   PetscErrorCode ierr;
5566   PetscInt       m=A->rmap->n,n=B->cmap->n;
5567   Mat            Cmat;
5568 
5569   PetscFunctionBegin;
5570   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5571   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5572   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5573   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5574   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5575   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5576   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5577   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5578 
5579   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5580 
5581   *C = Cmat;
5582   PetscFunctionReturn(0);
5583 }
5584 
5585 /* ----------------------------------------------------------------*/
5586 #undef __FUNCT__
5587 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5588 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5589 {
5590   PetscErrorCode ierr;
5591 
5592   PetscFunctionBegin;
5593   if (scall == MAT_INITIAL_MATRIX) {
5594     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5595     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5596     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5597   }
5598   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5599   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5600   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5601   PetscFunctionReturn(0);
5602 }
5603 
5604 #if defined(PETSC_HAVE_MUMPS)
5605 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5606 #endif
5607 #if defined(PETSC_HAVE_PASTIX)
5608 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5609 #endif
5610 #if defined(PETSC_HAVE_SUPERLU_DIST)
5611 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5612 #endif
5613 #if defined(PETSC_HAVE_CLIQUE)
5614 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5615 #endif
5616 
5617 /*MC
5618    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5619 
5620    Options Database Keys:
5621 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5622 
5623   Level: beginner
5624 
5625 .seealso: MatCreateAIJ()
5626 M*/
5627 
5628 #undef __FUNCT__
5629 #define __FUNCT__ "MatCreate_MPIAIJ"
5630 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5631 {
5632   Mat_MPIAIJ     *b;
5633   PetscErrorCode ierr;
5634   PetscMPIInt    size;
5635 
5636   PetscFunctionBegin;
5637   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5638 
5639   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5640   B->data       = (void*)b;
5641   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5642   B->assembled  = PETSC_FALSE;
5643   B->insertmode = NOT_SET_VALUES;
5644   b->size       = size;
5645 
5646   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5647 
5648   /* build cache for off array entries formed */
5649   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5650 
5651   b->donotstash  = PETSC_FALSE;
5652   b->colmap      = 0;
5653   b->garray      = 0;
5654   b->roworiented = PETSC_TRUE;
5655 
5656   /* stuff used for matrix vector multiply */
5657   b->lvec  = NULL;
5658   b->Mvctx = NULL;
5659 
5660   /* stuff for MatGetRow() */
5661   b->rowindices   = 0;
5662   b->rowvalues    = 0;
5663   b->getrowactive = PETSC_FALSE;
5664 
5665   /* flexible pointer used in CUSP/CUSPARSE classes */
5666   b->spptr = NULL;
5667 
5668 #if defined(PETSC_HAVE_MUMPS)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5670 #endif
5671 #if defined(PETSC_HAVE_PASTIX)
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5673 #endif
5674 #if defined(PETSC_HAVE_SUPERLU_DIST)
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5676 #endif
5677 #if defined(PETSC_HAVE_CLIQUE)
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5679 #endif
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5693   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 #undef __FUNCT__
5698 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5699 /*@
5700      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5701          and "off-diagonal" part of the matrix in CSR format.
5702 
5703    Collective on MPI_Comm
5704 
5705    Input Parameters:
5706 +  comm - MPI communicator
5707 .  m - number of local rows (Cannot be PETSC_DECIDE)
5708 .  n - This value should be the same as the local size used in creating the
5709        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5710        calculated if N is given) For square matrices n is almost always m.
5711 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5712 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5713 .   i - row indices for "diagonal" portion of matrix
5714 .   j - column indices
5715 .   a - matrix values
5716 .   oi - row indices for "off-diagonal" portion of matrix
5717 .   oj - column indices
5718 -   oa - matrix values
5719 
5720    Output Parameter:
5721 .   mat - the matrix
5722 
5723    Level: advanced
5724 
5725    Notes:
5726        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5727        must free the arrays once the matrix has been destroyed and not before.
5728 
5729        The i and j indices are 0 based
5730 
5731        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5732 
5733        This sets local rows and cannot be used to set off-processor values.
5734 
5735        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5736        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5737        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5738        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5739        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5740        communication if it is known that only local entries will be set.
5741 
5742 .keywords: matrix, aij, compressed row, sparse, parallel
5743 
5744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5745           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5746 @*/
5747 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5748 {
5749   PetscErrorCode ierr;
5750   Mat_MPIAIJ     *maij;
5751 
5752   PetscFunctionBegin;
5753   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5754   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5755   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5756   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5757   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5758   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5759   maij = (Mat_MPIAIJ*) (*mat)->data;
5760 
5761   (*mat)->preallocated = PETSC_TRUE;
5762 
5763   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5764   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5765 
5766   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5768 
5769   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773 
5774   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5775   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5777   PetscFunctionReturn(0);
5778 }
5779 
5780 /*
5781     Special version for direct calls from Fortran
5782 */
5783 #include <petsc-private/fortranimpl.h>
5784 
5785 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5786 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5787 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5788 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5789 #endif
5790 
5791 /* Change these macros so can be used in void function */
5792 #undef CHKERRQ
5793 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5794 #undef SETERRQ2
5795 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5796 #undef SETERRQ3
5797 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5798 #undef SETERRQ
5799 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5800 
5801 #undef __FUNCT__
5802 #define __FUNCT__ "matsetvaluesmpiaij_"
5803 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5804 {
5805   Mat            mat  = *mmat;
5806   PetscInt       m    = *mm, n = *mn;
5807   InsertMode     addv = *maddv;
5808   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5809   PetscScalar    value;
5810   PetscErrorCode ierr;
5811 
5812   MatCheckPreallocated(mat,1);
5813   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5814 
5815 #if defined(PETSC_USE_DEBUG)
5816   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5817 #endif
5818   {
5819     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5820     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5821     PetscBool roworiented = aij->roworiented;
5822 
5823     /* Some Variables required in the macro */
5824     Mat        A                 = aij->A;
5825     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5826     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5827     MatScalar  *aa               = a->a;
5828     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5829     Mat        B                 = aij->B;
5830     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5831     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5832     MatScalar  *ba               = b->a;
5833 
5834     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5835     PetscInt  nonew = a->nonew;
5836     MatScalar *ap1,*ap2;
5837 
5838     PetscFunctionBegin;
5839     for (i=0; i<m; i++) {
5840       if (im[i] < 0) continue;
5841 #if defined(PETSC_USE_DEBUG)
5842       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5843 #endif
5844       if (im[i] >= rstart && im[i] < rend) {
5845         row      = im[i] - rstart;
5846         lastcol1 = -1;
5847         rp1      = aj + ai[row];
5848         ap1      = aa + ai[row];
5849         rmax1    = aimax[row];
5850         nrow1    = ailen[row];
5851         low1     = 0;
5852         high1    = nrow1;
5853         lastcol2 = -1;
5854         rp2      = bj + bi[row];
5855         ap2      = ba + bi[row];
5856         rmax2    = bimax[row];
5857         nrow2    = bilen[row];
5858         low2     = 0;
5859         high2    = nrow2;
5860 
5861         for (j=0; j<n; j++) {
5862           if (roworiented) value = v[i*n+j];
5863           else value = v[i+j*m];
5864           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5865           if (in[j] >= cstart && in[j] < cend) {
5866             col = in[j] - cstart;
5867             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5868           } else if (in[j] < 0) continue;
5869 #if defined(PETSC_USE_DEBUG)
5870           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5871 #endif
5872           else {
5873             if (mat->was_assembled) {
5874               if (!aij->colmap) {
5875                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5876               }
5877 #if defined(PETSC_USE_CTABLE)
5878               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5879               col--;
5880 #else
5881               col = aij->colmap[in[j]] - 1;
5882 #endif
5883               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5884                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5885                 col  =  in[j];
5886                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5887                 B     = aij->B;
5888                 b     = (Mat_SeqAIJ*)B->data;
5889                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5890                 rp2   = bj + bi[row];
5891                 ap2   = ba + bi[row];
5892                 rmax2 = bimax[row];
5893                 nrow2 = bilen[row];
5894                 low2  = 0;
5895                 high2 = nrow2;
5896                 bm    = aij->B->rmap->n;
5897                 ba    = b->a;
5898               }
5899             } else col = in[j];
5900             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5901           }
5902         }
5903       } else if (!aij->donotstash) {
5904         if (roworiented) {
5905           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5906         } else {
5907           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5908         }
5909       }
5910     }
5911   }
5912   PetscFunctionReturnVoid();
5913 }
5914 
5915