xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4b4eb8d3b5c67aff3d3d02420f547e6671225154)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633 
634   PetscFunctionBegin;
635   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
636 
637   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
638   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
639   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
645 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
649   PetscErrorCode ierr;
650   PetscMPIInt    n;
651   PetscInt       i,j,rstart,ncols,flg;
652   PetscInt       *row,*col;
653   PetscBool      other_disassembled;
654   PetscScalar    *val;
655 
656   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
657 
658   PetscFunctionBegin;
659   if (!aij->donotstash && !mat->nooffprocentries) {
660     while (1) {
661       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
662       if (!flg) break;
663 
664       for (i=0; i<n; ) {
665         /* Now identify the consecutive vals belonging to the same row */
666         for (j=i,rstart=row[j]; j<n; j++) {
667           if (row[j] != rstart) break;
668         }
669         if (j < n) ncols = j-i;
670         else       ncols = n-i;
671         /* Now assemble all these values with a single function call */
672         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
673 
674         i = j;
675       }
676     }
677     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
678   }
679   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
681 
682   /* determine if any processor has disassembled, if so we must
683      also disassemble ourselfs, in order that we may reassemble. */
684   /*
685      if nonzero structure of submatrix B cannot change then we know that
686      no processor disassembled thus we can skip this stuff
687   */
688   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
689     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
690     if (mat->was_assembled && !other_disassembled) {
691       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
692     }
693   }
694   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
695     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
696   }
697   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
698   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
699   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
700 
701   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
702 
703   aij->rowvalues = 0;
704 
705   /* used by MatAXPY() */
706   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
707   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
708 
709   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
710   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
711 
712   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
713   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
714     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
715     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
722 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
723 {
724   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
725   PetscErrorCode ierr;
726 
727   PetscFunctionBegin;
728   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
729   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatZeroRows_MPIAIJ"
735 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
736 {
737   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
738   PetscInt      *owners = A->rmap->range;
739   PetscInt       n      = A->rmap->n;
740   PetscSF        sf;
741   PetscInt      *lrows;
742   PetscSFNode   *rrows;
743   PetscInt       r, p = 0, len = 0;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747   /* Create SF where leaves are input rows and roots are owned rows */
748   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
749   for (r = 0; r < n; ++r) lrows[r] = -1;
750   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
751   for (r = 0; r < N; ++r) {
752     const PetscInt idx   = rows[r];
753     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
754     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
755       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
756     }
757     if (A->nooffproczerorows) {
758       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
759       lrows[len++] = idx - owners[p];
760     } else {
761       rrows[r].rank = p;
762       rrows[r].index = rows[r] - owners[p];
763     }
764   }
765   if (!A->nooffproczerorows) {
766     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
767     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
768     /* Collect flags for rows to be zeroed */
769     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
770     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
771     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
772     /* Compress and put in row numbers */
773     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
774   }
775   /* fix right hand side if needed */
776   if (x && b) {
777     const PetscScalar *xx;
778     PetscScalar       *bb;
779 
780     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
781     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
782     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
783     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
785   }
786   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
787   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
788   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
789     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
790   } else if (diag != 0.0) {
791     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
792     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
793     for (r = 0; r < len; ++r) {
794       const PetscInt row = lrows[r] + A->rmap->rstart;
795       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
796     }
797     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
798     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
799   } else {
800     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
801   }
802   ierr = PetscFree(lrows);CHKERRQ(ierr);
803 
804   /* only change matrix nonzero state if pattern was allowed to be changed */
805   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
806     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
807     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815 {
816   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
817   PetscErrorCode    ierr;
818   PetscMPIInt       n = A->rmap->n;
819   PetscInt          i,j,r,m,p = 0,len = 0;
820   PetscInt          *lrows,*owners = A->rmap->range;
821   PetscSFNode       *rrows;
822   PetscSF           sf;
823   const PetscScalar *xx;
824   PetscScalar       *bb,*mask;
825   Vec               xmask,lmask;
826   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
827   const PetscInt    *aj, *ii,*ridx;
828   PetscScalar       *aa;
829 
830   PetscFunctionBegin;
831   /* Create SF where leaves are input rows and roots are owned rows */
832   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
833   for (r = 0; r < n; ++r) lrows[r] = -1;
834   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
835   for (r = 0; r < N; ++r) {
836     const PetscInt idx   = rows[r];
837     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
838     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
839       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
840     }
841     rrows[r].rank  = p;
842     rrows[r].index = rows[r] - owners[p];
843   }
844   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
845   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
846   /* Collect flags for rows to be zeroed */
847   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
848   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
849   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
850   /* Compress and put in row numbers */
851   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
852   /* zero diagonal part of matrix */
853   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
854   /* handle off diagonal part of matrix */
855   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
856   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
857   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
858   for (i=0; i<len; i++) bb[lrows[i]] = 1;
859   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
860   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
863   if (x) {
864     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
867     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
868   }
869   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
870   /* remove zeroed rows of off diagonal matrix */
871   ii = aij->i;
872   for (i=0; i<len; i++) {
873     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
874   }
875   /* loop over all elements of off process part of matrix zeroing removed columns*/
876   if (aij->compressedrow.use) {
877     m    = aij->compressedrow.nrows;
878     ii   = aij->compressedrow.i;
879     ridx = aij->compressedrow.rindex;
880     for (i=0; i<m; i++) {
881       n  = ii[i+1] - ii[i];
882       aj = aij->j + ii[i];
883       aa = aij->a + ii[i];
884 
885       for (j=0; j<n; j++) {
886         if (PetscAbsScalar(mask[*aj])) {
887           if (b) bb[*ridx] -= *aa*xx[*aj];
888           *aa = 0.0;
889         }
890         aa++;
891         aj++;
892       }
893       ridx++;
894     }
895   } else { /* do not use compressed row format */
896     m = l->B->rmap->n;
897     for (i=0; i<m; i++) {
898       n  = ii[i+1] - ii[i];
899       aj = aij->j + ii[i];
900       aa = aij->a + ii[i];
901       for (j=0; j<n; j++) {
902         if (PetscAbsScalar(mask[*aj])) {
903           if (b) bb[i] -= *aa*xx[*aj];
904           *aa = 0.0;
905         }
906         aa++;
907         aj++;
908       }
909     }
910   }
911   if (x) {
912     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
913     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
914   }
915   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
916   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
917   ierr = PetscFree(lrows);CHKERRQ(ierr);
918 
919   /* only change matrix nonzero state if pattern was allowed to be changed */
920   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
921     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
922     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatMult_MPIAIJ"
929 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
930 {
931   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
932   PetscErrorCode ierr;
933   PetscInt       nt;
934 
935   PetscFunctionBegin;
936   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
937   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
938   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
940   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
947 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951 
952   PetscFunctionBegin;
953   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatMultAdd_MPIAIJ"
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
974 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
975 {
976   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
977   PetscErrorCode ierr;
978   PetscBool      merged;
979 
980   PetscFunctionBegin;
981   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
982   /* do nondiagonal part */
983   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
984   if (!merged) {
985     /* send it on its way */
986     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987     /* do local part */
988     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
989     /* receive remote parts: note this assumes the values are not actually */
990     /* added in yy until the next line, */
991     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992   } else {
993     /* do local part */
994     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
995     /* send it on its way */
996     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997     /* values actually were received in the Begin() but we need to call this nop */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1005 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1006 {
1007   MPI_Comm       comm;
1008   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1009   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1010   IS             Me,Notme;
1011   PetscErrorCode ierr;
1012   PetscInt       M,N,first,last,*notme,i;
1013   PetscMPIInt    size;
1014 
1015   PetscFunctionBegin;
1016   /* Easy test: symmetric diagonal block */
1017   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1018   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1019   if (!*f) PetscFunctionReturn(0);
1020   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1025   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1026   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1027   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1031   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1032   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1033   Aoff = Aoffs[0];
1034   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1035   Boff = Boffs[0];
1036   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1037   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1039   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1041   ierr = PetscFree(notme);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1047 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1048 {
1049   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1050   PetscErrorCode ierr;
1051 
1052   PetscFunctionBegin;
1053   /* do nondiagonal part */
1054   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1055   /* send it on its way */
1056   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   /* do local part */
1058   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1059   /* receive remote parts */
1060   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /*
1065   This only works correctly for square matrices where the subblock A->A is the
1066    diagonal block
1067 */
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1070 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatScale_MPIAIJ"
1084 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1085 {
1086   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1091   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatDestroy_Redundant"
1097 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1098 {
1099   PetscErrorCode ierr;
1100   Mat_Redundant  *redund = *redundant;
1101   PetscInt       i;
1102 
1103   PetscFunctionBegin;
1104   *redundant = NULL;
1105   if (redund){
1106     if (redund->matseq) { /* via MatGetSubMatrices()  */
1107       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1108       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1109       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1110       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1111     } else {
1112       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1113       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1114       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1115       for (i=0; i<redund->nrecvs; i++) {
1116         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1117         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1118       }
1119       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1120     }
1121 
1122     if (redund->psubcomm) {
1123       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1124     }
1125     ierr = PetscFree(redund);CHKERRQ(ierr);
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatDestroy_MPIAIJ"
1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1133 {
1134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138 #if defined(PETSC_USE_LOG)
1139   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1140 #endif
1141   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1142   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1143   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1144   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1145   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1146 #if defined(PETSC_USE_CTABLE)
1147   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1148 #else
1149   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1150 #endif
1151   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1152   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1153   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1154   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1155   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1156   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1157 
1158   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1172 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1173 {
1174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1175   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1176   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1177   PetscErrorCode ierr;
1178   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1179   int            fd;
1180   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1181   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1182   PetscScalar    *column_values;
1183   PetscInt       message_count,flowcontrolcount;
1184   FILE           *file;
1185 
1186   PetscFunctionBegin;
1187   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1188   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1189   nz   = A->nz + B->nz;
1190   if (!rank) {
1191     header[0] = MAT_FILE_CLASSID;
1192     header[1] = mat->rmap->N;
1193     header[2] = mat->cmap->N;
1194 
1195     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1196     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1197     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1198     /* get largest number of rows any processor has */
1199     rlen  = mat->rmap->n;
1200     range = mat->rmap->range;
1201     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1202   } else {
1203     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1204     rlen = mat->rmap->n;
1205   }
1206 
1207   /* load up the local row counts */
1208   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1209   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1210 
1211   /* store the row lengths to the file */
1212   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1213   if (!rank) {
1214     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1215     for (i=1; i<size; i++) {
1216       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1217       rlen = range[i+1] - range[i];
1218       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1226   }
1227   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1228 
1229   /* load up the local column indices */
1230   nzmax = nz; /* th processor needs space a largest processor needs */
1231   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1233   cnt   = 0;
1234   for (i=0; i<mat->rmap->n; i++) {
1235     for (j=B->i[i]; j<B->i[i+1]; j++) {
1236       if ((col = garray[B->j[j]]) > cstart) break;
1237       column_indices[cnt++] = col;
1238     }
1239     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1240     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1241   }
1242   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1243 
1244   /* store the column indices to the file */
1245   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1246   if (!rank) {
1247     MPI_Status status;
1248     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1249     for (i=1; i<size; i++) {
1250       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1251       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1252       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1253       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1257   } else {
1258     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1259     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1262   }
1263   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1264 
1265   /* load up the local column values */
1266   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1267   cnt  = 0;
1268   for (i=0; i<mat->rmap->n; i++) {
1269     for (j=B->i[i]; j<B->i[i+1]; j++) {
1270       if (garray[B->j[j]] > cstart) break;
1271       column_values[cnt++] = B->a[j];
1272     }
1273     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1274     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1275   }
1276   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1277 
1278   /* store the column values to the file */
1279   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1280   if (!rank) {
1281     MPI_Status status;
1282     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1283     for (i=1; i<size; i++) {
1284       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1285       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1286       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1287       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1289     }
1290     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1291   } else {
1292     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1293     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1296   }
1297   ierr = PetscFree(column_values);CHKERRQ(ierr);
1298 
1299   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1300   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #include <petscdraw.h>
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1307 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1308 {
1309   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1310   PetscErrorCode    ierr;
1311   PetscMPIInt       rank = aij->rank,size = aij->size;
1312   PetscBool         isdraw,iascii,isbinary;
1313   PetscViewer       sviewer;
1314   PetscViewerFormat format;
1315 
1316   PetscFunctionBegin;
1317   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1318   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1319   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1320   if (iascii) {
1321     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1322     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1323       MatInfo   info;
1324       PetscBool inodes;
1325 
1326       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1327       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1330       if (!inodes) {
1331         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1332                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1333       } else {
1334         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1335                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1336       }
1337       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1338       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1339       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1340       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1341       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1342       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1343       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1344       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1347       PetscInt inodecount,inodelimit,*inodes;
1348       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1349       if (inodes) {
1350         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1351       } else {
1352         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1353       }
1354       PetscFunctionReturn(0);
1355     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1356       PetscFunctionReturn(0);
1357     }
1358   } else if (isbinary) {
1359     if (size == 1) {
1360       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1361       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1362     } else {
1363       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1364     }
1365     PetscFunctionReturn(0);
1366   } else if (isdraw) {
1367     PetscDraw draw;
1368     PetscBool isnull;
1369     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1370     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1371   }
1372 
1373   {
1374     /* assemble the entire matrix onto first processor. */
1375     Mat        A;
1376     Mat_SeqAIJ *Aloc;
1377     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1378     MatScalar  *a;
1379     const char *matname;
1380 
1381     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1382     if (!rank) {
1383       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1386     }
1387     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1388     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1389     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1390     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1391     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1392 
1393     /* copy over the A part */
1394     Aloc = (Mat_SeqAIJ*)aij->A->data;
1395     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1396     row  = mat->rmap->rstart;
1397     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1398     for (i=0; i<m; i++) {
1399       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1400       row++;
1401       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1402     }
1403     aj = Aloc->j;
1404     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1405 
1406     /* copy over the B part */
1407     Aloc = (Mat_SeqAIJ*)aij->B->data;
1408     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1409     row  = mat->rmap->rstart;
1410     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1411     ct   = cols;
1412     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1413     for (i=0; i<m; i++) {
1414       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1415       row++;
1416       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1417     }
1418     ierr = PetscFree(ct);CHKERRQ(ierr);
1419     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1420     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1421     /*
1422        Everyone has to call to draw the matrix since the graphics waits are
1423        synchronized across all processors that share the PetscDraw object
1424     */
1425     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1426     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1427     if (!rank) {
1428       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1429       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1430     }
1431     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1432     ierr = MatDestroy(&A);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatView_MPIAIJ"
1439 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1440 {
1441   PetscErrorCode ierr;
1442   PetscBool      iascii,isdraw,issocket,isbinary;
1443 
1444   PetscFunctionBegin;
1445   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1446   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1447   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1448   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1449   if (iascii || isdraw || isbinary || issocket) {
1450     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatSOR_MPIAIJ"
1457 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1458 {
1459   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1460   PetscErrorCode ierr;
1461   Vec            bb1 = 0;
1462   PetscBool      hasop;
1463 
1464   PetscFunctionBegin;
1465   if (flag == SOR_APPLY_UPPER) {
1466     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1467     PetscFunctionReturn(0);
1468   }
1469 
1470   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1471     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1472   }
1473 
1474   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1477       its--;
1478     }
1479 
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1508     if (flag & SOR_ZERO_INITIAL_GUESS) {
1509       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1510       its--;
1511     }
1512     while (its--) {
1513       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515 
1516       /* update rhs: bb1 = bb - B*x */
1517       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1518       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1519 
1520       /* local sweep */
1521       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1522     }
1523   } else if (flag & SOR_EISENSTAT) {
1524     Vec xx1;
1525 
1526     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1527     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1528 
1529     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531     if (!mat->diag) {
1532       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1533       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1534     }
1535     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1536     if (hasop) {
1537       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1538     } else {
1539       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1540     }
1541     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1542 
1543     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1544 
1545     /* local sweep */
1546     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1547     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1548     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1549   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1550 
1551   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatPermute_MPIAIJ"
1557 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1558 {
1559   Mat            aA,aB,Aperm;
1560   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1561   PetscScalar    *aa,*ba;
1562   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1563   PetscSF        rowsf,sf;
1564   IS             parcolp = NULL;
1565   PetscBool      done;
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1570   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1571   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1572   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1573 
1574   /* Invert row permutation to find out where my rows should go */
1575   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1576   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1577   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1578   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1579   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1580   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1581 
1582   /* Invert column permutation to find out where my columns should go */
1583   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1584   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1585   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1586   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1587   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1588   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1590 
1591   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1594 
1595   /* Find out where my gcols should go */
1596   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1597   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1601   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1602   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1603   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1604 
1605   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1606   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1607   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1608   for (i=0; i<m; i++) {
1609     PetscInt row = rdest[i],rowner;
1610     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1611     for (j=ai[i]; j<ai[i+1]; j++) {
1612       PetscInt cowner,col = cdest[aj[j]];
1613       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1614       if (rowner == cowner) dnnz[i]++;
1615       else onnz[i]++;
1616     }
1617     for (j=bi[i]; j<bi[i+1]; j++) {
1618       PetscInt cowner,col = gcdest[bj[j]];
1619       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1620       if (rowner == cowner) dnnz[i]++;
1621       else onnz[i]++;
1622     }
1623   }
1624   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1625   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1626   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1627   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1628   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1629 
1630   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1631   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1632   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1633   for (i=0; i<m; i++) {
1634     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1635     PetscInt j0,rowlen;
1636     rowlen = ai[i+1] - ai[i];
1637     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1638       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1639       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1640     }
1641     rowlen = bi[i+1] - bi[i];
1642     for (j0=j=0; j<rowlen; j0=j) {
1643       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1644       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1645     }
1646   }
1647   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1650   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1651   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1653   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1654   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1655   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1656   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1657   *B = Aperm;
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1664 {
1665   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1666   Mat            A    = mat->A,B = mat->B;
1667   PetscErrorCode ierr;
1668   PetscReal      isend[5],irecv[5];
1669 
1670   PetscFunctionBegin;
1671   info->block_size = 1.0;
1672   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1673 
1674   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1675   isend[3] = info->memory;  isend[4] = info->mallocs;
1676 
1677   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1678 
1679   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1680   isend[3] += info->memory;  isend[4] += info->mallocs;
1681   if (flag == MAT_LOCAL) {
1682     info->nz_used      = isend[0];
1683     info->nz_allocated = isend[1];
1684     info->nz_unneeded  = isend[2];
1685     info->memory       = isend[3];
1686     info->mallocs      = isend[4];
1687   } else if (flag == MAT_GLOBAL_MAX) {
1688     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1689 
1690     info->nz_used      = irecv[0];
1691     info->nz_allocated = irecv[1];
1692     info->nz_unneeded  = irecv[2];
1693     info->memory       = irecv[3];
1694     info->mallocs      = irecv[4];
1695   } else if (flag == MAT_GLOBAL_SUM) {
1696     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1697 
1698     info->nz_used      = irecv[0];
1699     info->nz_allocated = irecv[1];
1700     info->nz_unneeded  = irecv[2];
1701     info->memory       = irecv[3];
1702     info->mallocs      = irecv[4];
1703   }
1704   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1705   info->fill_ratio_needed = 0;
1706   info->factor_mallocs    = 0;
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatSetOption_MPIAIJ"
1712 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1713 {
1714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   switch (op) {
1719   case MAT_NEW_NONZERO_LOCATIONS:
1720   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1721   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1722   case MAT_KEEP_NONZERO_PATTERN:
1723   case MAT_NEW_NONZERO_LOCATION_ERR:
1724   case MAT_USE_INODES:
1725   case MAT_IGNORE_ZERO_ENTRIES:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1729     break;
1730   case MAT_ROW_ORIENTED:
1731     a->roworiented = flg;
1732 
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_NEW_DIAGONALS:
1737     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1738     break;
1739   case MAT_IGNORE_OFF_PROC_ENTRIES:
1740     a->donotstash = flg;
1741     break;
1742   case MAT_SPD:
1743     A->spd_set = PETSC_TRUE;
1744     A->spd     = flg;
1745     if (flg) {
1746       A->symmetric                  = PETSC_TRUE;
1747       A->structurally_symmetric     = PETSC_TRUE;
1748       A->symmetric_set              = PETSC_TRUE;
1749       A->structurally_symmetric_set = PETSC_TRUE;
1750     }
1751     break;
1752   case MAT_SYMMETRIC:
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_STRUCTURALLY_SYMMETRIC:
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     break;
1758   case MAT_HERMITIAN:
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     break;
1761   case MAT_SYMMETRY_ETERNAL:
1762     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1763     break;
1764   default:
1765     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatGetRow_MPIAIJ"
1772 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1773 {
1774   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1775   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1776   PetscErrorCode ierr;
1777   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1778   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1779   PetscInt       *cmap,*idx_p;
1780 
1781   PetscFunctionBegin;
1782   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1783   mat->getrowactive = PETSC_TRUE;
1784 
1785   if (!mat->rowvalues && (idx || v)) {
1786     /*
1787         allocate enough space to hold information from the longest row.
1788     */
1789     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1790     PetscInt   max = 1,tmp;
1791     for (i=0; i<matin->rmap->n; i++) {
1792       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1793       if (max < tmp) max = tmp;
1794     }
1795     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1796   }
1797 
1798   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1799   lrow = row - rstart;
1800 
1801   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1802   if (!v)   {pvA = 0; pvB = 0;}
1803   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1804   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1805   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1806   nztot = nzA + nzB;
1807 
1808   cmap = mat->garray;
1809   if (v  || idx) {
1810     if (nztot) {
1811       /* Sort by increasing column numbers, assuming A and B already sorted */
1812       PetscInt imark = -1;
1813       if (v) {
1814         *v = v_p = mat->rowvalues;
1815         for (i=0; i<nzB; i++) {
1816           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1817           else break;
1818         }
1819         imark = i;
1820         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1821         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1822       }
1823       if (idx) {
1824         *idx = idx_p = mat->rowindices;
1825         if (imark > -1) {
1826           for (i=0; i<imark; i++) {
1827             idx_p[i] = cmap[cworkB[i]];
1828           }
1829         } else {
1830           for (i=0; i<nzB; i++) {
1831             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1832             else break;
1833           }
1834           imark = i;
1835         }
1836         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1837         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1838       }
1839     } else {
1840       if (idx) *idx = 0;
1841       if (v)   *v   = 0;
1842     }
1843   }
1844   *nz  = nztot;
1845   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1846   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1852 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1855 
1856   PetscFunctionBegin;
1857   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1858   aij->getrowactive = PETSC_FALSE;
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatNorm_MPIAIJ"
1864 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1865 {
1866   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1867   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1868   PetscErrorCode ierr;
1869   PetscInt       i,j,cstart = mat->cmap->rstart;
1870   PetscReal      sum = 0.0;
1871   MatScalar      *v;
1872 
1873   PetscFunctionBegin;
1874   if (aij->size == 1) {
1875     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1876   } else {
1877     if (type == NORM_FROBENIUS) {
1878       v = amat->a;
1879       for (i=0; i<amat->nz; i++) {
1880         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1881       }
1882       v = bmat->a;
1883       for (i=0; i<bmat->nz; i++) {
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885       }
1886       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       *norm = PetscSqrtReal(*norm);
1888     } else if (type == NORM_1) { /* max column norm */
1889       PetscReal *tmp,*tmp2;
1890       PetscInt  *jj,*garray = aij->garray;
1891       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1892       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1893       *norm = 0.0;
1894       v     = amat->a; jj = amat->j;
1895       for (j=0; j<amat->nz; j++) {
1896         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1897       }
1898       v = bmat->a; jj = bmat->j;
1899       for (j=0; j<bmat->nz; j++) {
1900         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1901       }
1902       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1903       for (j=0; j<mat->cmap->N; j++) {
1904         if (tmp2[j] > *norm) *norm = tmp2[j];
1905       }
1906       ierr = PetscFree(tmp);CHKERRQ(ierr);
1907       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1908     } else if (type == NORM_INFINITY) { /* max row norm */
1909       PetscReal ntemp = 0.0;
1910       for (j=0; j<aij->A->rmap->n; j++) {
1911         v   = amat->a + amat->i[j];
1912         sum = 0.0;
1913         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1914           sum += PetscAbsScalar(*v); v++;
1915         }
1916         v = bmat->a + bmat->i[j];
1917         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         if (sum > ntemp) ntemp = sum;
1921       }
1922       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1923     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatTranspose_MPIAIJ"
1930 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1931 {
1932   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1933   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1936   PetscInt       cstart = A->cmap->rstart,ncol;
1937   Mat            B;
1938   MatScalar      *array;
1939 
1940   PetscFunctionBegin;
1941   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1942 
1943   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1944   ai = Aloc->i; aj = Aloc->j;
1945   bi = Bloc->i; bj = Bloc->j;
1946   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1947     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1948     PetscSFNode          *oloc;
1949     PETSC_UNUSED PetscSF sf;
1950 
1951     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1952     /* compute d_nnz for preallocation */
1953     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++) {
1955       d_nnz[aj[i]]++;
1956       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1957     }
1958     /* compute local off-diagonal contributions */
1959     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1960     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1961     /* map those to global */
1962     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1963     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1964     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1965     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1966     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1967     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1968     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1969 
1970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1972     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1973     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1974     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1975     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1976   } else {
1977     B    = *matout;
1978     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1979     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1980   }
1981 
1982   /* copy over the A part */
1983   array = Aloc->a;
1984   row   = A->rmap->rstart;
1985   for (i=0; i<ma; i++) {
1986     ncol = ai[i+1]-ai[i];
1987     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1988     row++;
1989     array += ncol; aj += ncol;
1990   }
1991   aj = Aloc->j;
1992   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1993 
1994   /* copy over the B part */
1995   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1996   array = Bloc->a;
1997   row   = A->rmap->rstart;
1998   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1999   cols_tmp = cols;
2000   for (i=0; i<mb; i++) {
2001     ncol = bi[i+1]-bi[i];
2002     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2003     row++;
2004     array += ncol; cols_tmp += ncol;
2005   }
2006   ierr = PetscFree(cols);CHKERRQ(ierr);
2007 
2008   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2009   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2010   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2011     *matout = B;
2012   } else {
2013     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2020 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat            a    = aij->A,b = aij->B;
2024   PetscErrorCode ierr;
2025   PetscInt       s1,s2,s3;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2029   if (rr) {
2030     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2031     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2032     /* Overlap communication with computation. */
2033     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2034   }
2035   if (ll) {
2036     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2037     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2038     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2039   }
2040   /* scale  the diagonal block */
2041   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2042 
2043   if (rr) {
2044     /* Do a scatter end and then right scale the off-diagonal block */
2045     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2046     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2053 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2054 {
2055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatEqual_MPIAIJ"
2065 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2066 {
2067   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2068   Mat            a,b,c,d;
2069   PetscBool      flg;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   a = matA->A; b = matA->B;
2074   c = matB->A; d = matB->B;
2075 
2076   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2077   if (flg) {
2078     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2079   }
2080   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCopy_MPIAIJ"
2086 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2087 {
2088   PetscErrorCode ierr;
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2091 
2092   PetscFunctionBegin;
2093   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2094   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2095     /* because of the column compression in the off-processor part of the matrix a->B,
2096        the number of columns in a->B and b->B may be different, hence we cannot call
2097        the MatCopy() directly on the two parts. If need be, we can provide a more
2098        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2099        then copying the submatrices */
2100     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2101   } else {
2102     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2103     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetUp_MPIAIJ"
2110 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2111 {
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 /*
2120    Computes the number of nonzeros per row needed for preallocation when X and Y
2121    have different nonzero structure.
2122 */
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2125 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2126 {
2127   PetscInt       i,j,k,nzx,nzy;
2128 
2129   PetscFunctionBegin;
2130   /* Set the number of nonzeros in the new matrix */
2131   for (i=0; i<m; i++) {
2132     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2133     nzx = xi[i+1] - xi[i];
2134     nzy = yi[i+1] - yi[i];
2135     nnz[i] = 0;
2136     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2137       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2138       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2139       nnz[i]++;
2140     }
2141     for (; k<nzy; k++) nnz[i]++;
2142   }
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2149 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2150 {
2151   PetscErrorCode ierr;
2152   PetscInt       m = Y->rmap->N;
2153   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2154   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2155 
2156   PetscFunctionBegin;
2157   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatAXPY_MPIAIJ"
2163 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2164 {
2165   PetscErrorCode ierr;
2166   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2167   PetscBLASInt   bnz,one=1;
2168   Mat_SeqAIJ     *x,*y;
2169 
2170   PetscFunctionBegin;
2171   if (str == SAME_NONZERO_PATTERN) {
2172     PetscScalar alpha = a;
2173     x    = (Mat_SeqAIJ*)xx->A->data;
2174     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2175     y    = (Mat_SeqAIJ*)yy->A->data;
2176     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2177     x    = (Mat_SeqAIJ*)xx->B->data;
2178     y    = (Mat_SeqAIJ*)yy->B->data;
2179     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2180     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2181     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2182   } else if (str == SUBSET_NONZERO_PATTERN) {
2183     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2184   } else {
2185     Mat      B;
2186     PetscInt *nnz_d,*nnz_o;
2187     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2188     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2189     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2190     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2191     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2192     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2193     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2194     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2195     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2196     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2197     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2198     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2199     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2200     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatConjugate_MPIAIJ"
2209 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2210 {
2211 #if defined(PETSC_USE_COMPLEX)
2212   PetscErrorCode ierr;
2213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2217   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2218 #else
2219   PetscFunctionBegin;
2220 #endif
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatRealPart_MPIAIJ"
2226 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2227 {
2228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2229   PetscErrorCode ierr;
2230 
2231   PetscFunctionBegin;
2232   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2233   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2239 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2246   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #if defined(PETSC_HAVE_PBGL)
2251 
2252 #include <boost/parallel/mpi/bsp_process_group.hpp>
2253 #include <boost/graph/distributed/ilu_default_graph.hpp>
2254 #include <boost/graph/distributed/ilu_0_block.hpp>
2255 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2256 #include <boost/graph/distributed/petsc/interface.hpp>
2257 #include <boost/multi_array.hpp>
2258 #include <boost/parallel/distributed_property_map->hpp>
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2262 /*
2263   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2264 */
2265 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2266 {
2267   namespace petsc = boost::distributed::petsc;
2268 
2269   namespace graph_dist = boost::graph::distributed;
2270   using boost::graph::distributed::ilu_default::process_group_type;
2271   using boost::graph::ilu_permuted;
2272 
2273   PetscBool      row_identity, col_identity;
2274   PetscContainer c;
2275   PetscInt       m, n, M, N;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2280   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2281   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2282   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2283 
2284   process_group_type pg;
2285   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2286   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2287   lgraph_type& level_graph = *lgraph_p;
2288   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2289 
2290   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2291   ilu_permuted(level_graph);
2292 
2293   /* put together the new matrix */
2294   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2295   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2296   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2297   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2298   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2299   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2300   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2301   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2302 
2303   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2304   ierr = PetscContainerSetPointer(c, lgraph_p);
2305   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2306   ierr = PetscContainerDestroy(&c);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2312 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2313 {
2314   PetscFunctionBegin;
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatSolve_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2324 {
2325   namespace graph_dist = boost::graph::distributed;
2326 
2327   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2328   lgraph_type    *lgraph_p;
2329   PetscContainer c;
2330   PetscErrorCode ierr;
2331 
2332   PetscFunctionBegin;
2333   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2334   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2335   ierr = VecCopy(b, x);CHKERRQ(ierr);
2336 
2337   PetscScalar *array_x;
2338   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2339   PetscInt sx;
2340   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2341 
2342   PetscScalar *array_b;
2343   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2344   PetscInt sb;
2345   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2346 
2347   lgraph_type& level_graph = *lgraph_p;
2348   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2349 
2350   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2351   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2352   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2353 
2354   typedef boost::iterator_property_map<array_ref_type::iterator,
2355                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2356   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2357   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2358 
2359   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2360   PetscFunctionReturn(0);
2361 }
2362 #endif
2363 
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2367 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2368 {
2369   PetscMPIInt    rank,size;
2370   MPI_Comm       comm;
2371   PetscErrorCode ierr;
2372   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2373   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2374   PetscInt       *rowrange = mat->rmap->range;
2375   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2376   Mat            A = aij->A,B=aij->B,C=*matredundant;
2377   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2378   PetscScalar    *sbuf_a;
2379   PetscInt       nzlocal=a->nz+b->nz;
2380   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2381   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2382   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2383   MatScalar      *aworkA,*aworkB;
2384   PetscScalar    *vals;
2385   PetscMPIInt    tag1,tag2,tag3,imdex;
2386   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2387   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2388   MPI_Status     recv_status,*send_status;
2389   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2390   PetscInt       **rbuf_j=NULL;
2391   PetscScalar    **rbuf_a=NULL;
2392   Mat_Redundant  *redund =NULL;
2393 
2394   PetscFunctionBegin;
2395   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2396   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2398   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2399   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2400 
2401   if (reuse == MAT_REUSE_MATRIX) {
2402     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2403     if (subsize == 1) {
2404       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2405       redund = c->redundant;
2406     } else {
2407       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2408       redund = c->redundant;
2409     }
2410     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2411 
2412     nsends    = redund->nsends;
2413     nrecvs    = redund->nrecvs;
2414     send_rank = redund->send_rank;
2415     recv_rank = redund->recv_rank;
2416     sbuf_nz   = redund->sbuf_nz;
2417     rbuf_nz   = redund->rbuf_nz;
2418     sbuf_j    = redund->sbuf_j;
2419     sbuf_a    = redund->sbuf_a;
2420     rbuf_j    = redund->rbuf_j;
2421     rbuf_a    = redund->rbuf_a;
2422   }
2423 
2424   if (reuse == MAT_INITIAL_MATRIX) {
2425     PetscInt    nleftover,np_subcomm;
2426 
2427     /* get the destination processors' id send_rank, nsends and nrecvs */
2428     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2429 
2430     np_subcomm = size/nsubcomm;
2431     nleftover  = size - nsubcomm*np_subcomm;
2432 
2433     /* block of codes below is specific for INTERLACED */
2434     /* ------------------------------------------------*/
2435     nsends = 0; nrecvs = 0;
2436     for (i=0; i<size; i++) {
2437       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2438         send_rank[nsends++] = i;
2439         recv_rank[nrecvs++] = i;
2440       }
2441     }
2442     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2443       i = size-nleftover-1;
2444       j = 0;
2445       while (j < nsubcomm - nleftover) {
2446         send_rank[nsends++] = i;
2447         i--; j++;
2448       }
2449     }
2450 
2451     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2452       for (i=0; i<nleftover; i++) {
2453         recv_rank[nrecvs++] = size-nleftover+i;
2454       }
2455     }
2456     /*----------------------------------------------*/
2457 
2458     /* allocate sbuf_j, sbuf_a */
2459     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2460     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2461     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2462     /*
2463     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2464     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2465      */
2466   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2467 
2468   /* copy mat's local entries into the buffers */
2469   if (reuse == MAT_INITIAL_MATRIX) {
2470     rownz_max = 0;
2471     rptr      = sbuf_j;
2472     cols      = sbuf_j + rend-rstart + 1;
2473     vals      = sbuf_a;
2474     rptr[0]   = 0;
2475     for (i=0; i<rend-rstart; i++) {
2476       row    = i + rstart;
2477       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2478       ncols  = nzA + nzB;
2479       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2480       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2481       /* load the column indices for this row into cols */
2482       lwrite = 0;
2483       for (l=0; l<nzB; l++) {
2484         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2485           vals[lwrite]   = aworkB[l];
2486           cols[lwrite++] = ctmp;
2487         }
2488       }
2489       for (l=0; l<nzA; l++) {
2490         vals[lwrite]   = aworkA[l];
2491         cols[lwrite++] = cstart + cworkA[l];
2492       }
2493       for (l=0; l<nzB; l++) {
2494         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2495           vals[lwrite]   = aworkB[l];
2496           cols[lwrite++] = ctmp;
2497         }
2498       }
2499       vals     += ncols;
2500       cols     += ncols;
2501       rptr[i+1] = rptr[i] + ncols;
2502       if (rownz_max < ncols) rownz_max = ncols;
2503     }
2504     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2505   } else { /* only copy matrix values into sbuf_a */
2506     rptr    = sbuf_j;
2507     vals    = sbuf_a;
2508     rptr[0] = 0;
2509     for (i=0; i<rend-rstart; i++) {
2510       row    = i + rstart;
2511       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2512       ncols  = nzA + nzB;
2513       cworkB = b->j + b->i[i];
2514       aworkA = a->a + a->i[i];
2515       aworkB = b->a + b->i[i];
2516       lwrite = 0;
2517       for (l=0; l<nzB; l++) {
2518         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2519       }
2520       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2521       for (l=0; l<nzB; l++) {
2522         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2523       }
2524       vals     += ncols;
2525       rptr[i+1] = rptr[i] + ncols;
2526     }
2527   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2528 
2529   /* send nzlocal to others, and recv other's nzlocal */
2530   /*--------------------------------------------------*/
2531   if (reuse == MAT_INITIAL_MATRIX) {
2532     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2533 
2534     s_waits2 = s_waits3 + nsends;
2535     s_waits1 = s_waits2 + nsends;
2536     r_waits1 = s_waits1 + nsends;
2537     r_waits2 = r_waits1 + nrecvs;
2538     r_waits3 = r_waits2 + nrecvs;
2539   } else {
2540     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2541 
2542     r_waits3 = s_waits3 + nsends;
2543   }
2544 
2545   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2546   if (reuse == MAT_INITIAL_MATRIX) {
2547     /* get new tags to keep the communication clean */
2548     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2549     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2550     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2551 
2552     /* post receives of other's nzlocal */
2553     for (i=0; i<nrecvs; i++) {
2554       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2555     }
2556     /* send nzlocal to others */
2557     for (i=0; i<nsends; i++) {
2558       sbuf_nz[i] = nzlocal;
2559       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2560     }
2561     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2562     count = nrecvs;
2563     while (count) {
2564       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2565 
2566       recv_rank[imdex] = recv_status.MPI_SOURCE;
2567       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2568       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2569 
2570       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2571 
2572       rbuf_nz[imdex] += i + 2;
2573 
2574       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2575       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2576       count--;
2577     }
2578     /* wait on sends of nzlocal */
2579     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2580     /* send mat->i,j to others, and recv from other's */
2581     /*------------------------------------------------*/
2582     for (i=0; i<nsends; i++) {
2583       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2584       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2585     }
2586     /* wait on receives of mat->i,j */
2587     /*------------------------------*/
2588     count = nrecvs;
2589     while (count) {
2590       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2591       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2592       count--;
2593     }
2594     /* wait on sends of mat->i,j */
2595     /*---------------------------*/
2596     if (nsends) {
2597       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2598     }
2599   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2600 
2601   /* post receives, send and receive mat->a */
2602   /*----------------------------------------*/
2603   for (imdex=0; imdex<nrecvs; imdex++) {
2604     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2605   }
2606   for (i=0; i<nsends; i++) {
2607     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2608   }
2609   count = nrecvs;
2610   while (count) {
2611     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2612     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2613     count--;
2614   }
2615   if (nsends) {
2616     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2617   }
2618 
2619   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2620 
2621   /* create redundant matrix */
2622   /*-------------------------*/
2623   if (reuse == MAT_INITIAL_MATRIX) {
2624     const PetscInt *range;
2625     PetscInt       rstart_sub,rend_sub,mloc_sub;
2626 
2627     /* compute rownz_max for preallocation */
2628     for (imdex=0; imdex<nrecvs; imdex++) {
2629       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2630       rptr = rbuf_j[imdex];
2631       for (i=0; i<j; i++) {
2632         ncols = rptr[i+1] - rptr[i];
2633         if (rownz_max < ncols) rownz_max = ncols;
2634       }
2635     }
2636 
2637     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2638 
2639     /* get local size of redundant matrix
2640        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2641     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2642     rstart_sub = range[nsubcomm*subrank];
2643     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2644       rend_sub = range[nsubcomm*(subrank+1)];
2645     } else {
2646       rend_sub = mat->rmap->N;
2647     }
2648     mloc_sub = rend_sub - rstart_sub;
2649 
2650     if (M == N) {
2651       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2652     } else { /* non-square matrix */
2653       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2654     }
2655     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2656     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2657     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2658     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2659   } else {
2660     C = *matredundant;
2661   }
2662 
2663   /* insert local matrix entries */
2664   rptr = sbuf_j;
2665   cols = sbuf_j + rend-rstart + 1;
2666   vals = sbuf_a;
2667   for (i=0; i<rend-rstart; i++) {
2668     row   = i + rstart;
2669     ncols = rptr[i+1] - rptr[i];
2670     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2671     vals += ncols;
2672     cols += ncols;
2673   }
2674   /* insert received matrix entries */
2675   for (imdex=0; imdex<nrecvs; imdex++) {
2676     rstart = rowrange[recv_rank[imdex]];
2677     rend   = rowrange[recv_rank[imdex]+1];
2678     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2679     rptr   = rbuf_j[imdex];
2680     cols   = rbuf_j[imdex] + rend-rstart + 1;
2681     vals   = rbuf_a[imdex];
2682     for (i=0; i<rend-rstart; i++) {
2683       row   = i + rstart;
2684       ncols = rptr[i+1] - rptr[i];
2685       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2686       vals += ncols;
2687       cols += ncols;
2688     }
2689   }
2690   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2692 
2693   if (reuse == MAT_INITIAL_MATRIX) {
2694     *matredundant = C;
2695 
2696     /* create a supporting struct and attach it to C for reuse */
2697     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2698     if (subsize == 1) {
2699       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2700       c->redundant = redund;
2701     } else {
2702       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2703       c->redundant = redund;
2704     }
2705 
2706     redund->nzlocal   = nzlocal;
2707     redund->nsends    = nsends;
2708     redund->nrecvs    = nrecvs;
2709     redund->send_rank = send_rank;
2710     redund->recv_rank = recv_rank;
2711     redund->sbuf_nz   = sbuf_nz;
2712     redund->rbuf_nz   = rbuf_nz;
2713     redund->sbuf_j    = sbuf_j;
2714     redund->sbuf_a    = sbuf_a;
2715     redund->rbuf_j    = rbuf_j;
2716     redund->rbuf_a    = rbuf_a;
2717     redund->psubcomm  = NULL;
2718   }
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2724 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2725 {
2726   PetscErrorCode ierr;
2727   MPI_Comm       comm;
2728   PetscMPIInt    size,subsize;
2729   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2730   Mat_Redundant  *redund=NULL;
2731   PetscSubcomm   psubcomm=NULL;
2732   MPI_Comm       subcomm_in=subcomm;
2733   Mat            *matseq;
2734   IS             isrow,iscol;
2735 
2736   PetscFunctionBegin;
2737   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2738     if (reuse ==  MAT_INITIAL_MATRIX) {
2739       /* create psubcomm, then get subcomm */
2740       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2741       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2742       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2743 
2744       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2745       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2746       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2747       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2748       subcomm = psubcomm->comm;
2749     } else { /* retrieve psubcomm and subcomm */
2750       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2751       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2752       if (subsize == 1) {
2753         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2754         redund = c->redundant;
2755       } else {
2756         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2757         redund = c->redundant;
2758       }
2759       psubcomm = redund->psubcomm;
2760     }
2761     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2762       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2763       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2764         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2765         if (subsize == 1) {
2766           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2767           c->redundant->psubcomm = psubcomm;
2768         } else {
2769           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2770           c->redundant->psubcomm = psubcomm ;
2771         }
2772       }
2773       PetscFunctionReturn(0);
2774     }
2775   }
2776 
2777   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2778   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2779   if (reuse == MAT_INITIAL_MATRIX) {
2780     /* create a local sequential matrix matseq[0] */
2781     mloc_sub = PETSC_DECIDE;
2782     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2783     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2784     rstart = rend - mloc_sub;
2785     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2786     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2787   } else { /* reuse == MAT_REUSE_MATRIX */
2788     if (subsize == 1) {
2789       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2790       redund = c->redundant;
2791     } else {
2792       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2793       redund = c->redundant;
2794     }
2795 
2796     isrow  = redund->isrow;
2797     iscol  = redund->iscol;
2798     matseq = redund->matseq;
2799   }
2800   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2801   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2802 
2803   if (reuse == MAT_INITIAL_MATRIX) {
2804     /* create a supporting struct and attach it to C for reuse */
2805     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2806     if (subsize == 1) {
2807       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2808       c->redundant = redund;
2809     } else {
2810       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2811       c->redundant = redund;
2812     }
2813     redund->isrow    = isrow;
2814     redund->iscol    = iscol;
2815     redund->matseq   = matseq;
2816     redund->psubcomm = psubcomm;
2817   }
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2823 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2824 {
2825   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2826   PetscErrorCode ierr;
2827   PetscInt       i,*idxb = 0;
2828   PetscScalar    *va,*vb;
2829   Vec            vtmp;
2830 
2831   PetscFunctionBegin;
2832   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2833   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2834   if (idx) {
2835     for (i=0; i<A->rmap->n; i++) {
2836       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2837     }
2838   }
2839 
2840   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2841   if (idx) {
2842     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2843   }
2844   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2845   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2846 
2847   for (i=0; i<A->rmap->n; i++) {
2848     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2849       va[i] = vb[i];
2850       if (idx) idx[i] = a->garray[idxb[i]];
2851     }
2852   }
2853 
2854   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2855   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2856   ierr = PetscFree(idxb);CHKERRQ(ierr);
2857   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2863 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2864 {
2865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2866   PetscErrorCode ierr;
2867   PetscInt       i,*idxb = 0;
2868   PetscScalar    *va,*vb;
2869   Vec            vtmp;
2870 
2871   PetscFunctionBegin;
2872   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2874   if (idx) {
2875     for (i=0; i<A->cmap->n; i++) {
2876       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2877     }
2878   }
2879 
2880   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2881   if (idx) {
2882     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2883   }
2884   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2885   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2886 
2887   for (i=0; i<A->rmap->n; i++) {
2888     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2889       va[i] = vb[i];
2890       if (idx) idx[i] = a->garray[idxb[i]];
2891     }
2892   }
2893 
2894   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2895   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2896   ierr = PetscFree(idxb);CHKERRQ(ierr);
2897   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2903 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2904 {
2905   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2906   PetscInt       n      = A->rmap->n;
2907   PetscInt       cstart = A->cmap->rstart;
2908   PetscInt       *cmap  = mat->garray;
2909   PetscInt       *diagIdx, *offdiagIdx;
2910   Vec            diagV, offdiagV;
2911   PetscScalar    *a, *diagA, *offdiagA;
2912   PetscInt       r;
2913   PetscErrorCode ierr;
2914 
2915   PetscFunctionBegin;
2916   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2917   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2918   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2919   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2920   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2921   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2922   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2923   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2924   for (r = 0; r < n; ++r) {
2925     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2926       a[r]   = diagA[r];
2927       idx[r] = cstart + diagIdx[r];
2928     } else {
2929       a[r]   = offdiagA[r];
2930       idx[r] = cmap[offdiagIdx[r]];
2931     }
2932   }
2933   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2934   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2935   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2936   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2937   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2938   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2944 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2945 {
2946   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2947   PetscInt       n      = A->rmap->n;
2948   PetscInt       cstart = A->cmap->rstart;
2949   PetscInt       *cmap  = mat->garray;
2950   PetscInt       *diagIdx, *offdiagIdx;
2951   Vec            diagV, offdiagV;
2952   PetscScalar    *a, *diagA, *offdiagA;
2953   PetscInt       r;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2958   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2959   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2960   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2961   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2962   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2963   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2964   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2965   for (r = 0; r < n; ++r) {
2966     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2967       a[r]   = diagA[r];
2968       idx[r] = cstart + diagIdx[r];
2969     } else {
2970       a[r]   = offdiagA[r];
2971       idx[r] = cmap[offdiagIdx[r]];
2972     }
2973   }
2974   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2975   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2976   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2977   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2978   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2979   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2985 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2986 {
2987   PetscErrorCode ierr;
2988   Mat            *dummy;
2989 
2990   PetscFunctionBegin;
2991   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2992   *newmat = *dummy;
2993   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2999 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3000 {
3001   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3002   PetscErrorCode ierr;
3003 
3004   PetscFunctionBegin;
3005   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3011 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3012 {
3013   PetscErrorCode ierr;
3014   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3015 
3016   PetscFunctionBegin;
3017   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3018   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3019   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3020   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3021   PetscFunctionReturn(0);
3022 }
3023 
3024 /* -------------------------------------------------------------------*/
3025 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3026                                        MatGetRow_MPIAIJ,
3027                                        MatRestoreRow_MPIAIJ,
3028                                        MatMult_MPIAIJ,
3029                                 /* 4*/ MatMultAdd_MPIAIJ,
3030                                        MatMultTranspose_MPIAIJ,
3031                                        MatMultTransposeAdd_MPIAIJ,
3032 #if defined(PETSC_HAVE_PBGL)
3033                                        MatSolve_MPIAIJ,
3034 #else
3035                                        0,
3036 #endif
3037                                        0,
3038                                        0,
3039                                 /*10*/ 0,
3040                                        0,
3041                                        0,
3042                                        MatSOR_MPIAIJ,
3043                                        MatTranspose_MPIAIJ,
3044                                 /*15*/ MatGetInfo_MPIAIJ,
3045                                        MatEqual_MPIAIJ,
3046                                        MatGetDiagonal_MPIAIJ,
3047                                        MatDiagonalScale_MPIAIJ,
3048                                        MatNorm_MPIAIJ,
3049                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3050                                        MatAssemblyEnd_MPIAIJ,
3051                                        MatSetOption_MPIAIJ,
3052                                        MatZeroEntries_MPIAIJ,
3053                                 /*24*/ MatZeroRows_MPIAIJ,
3054                                        0,
3055 #if defined(PETSC_HAVE_PBGL)
3056                                        0,
3057 #else
3058                                        0,
3059 #endif
3060                                        0,
3061                                        0,
3062                                 /*29*/ MatSetUp_MPIAIJ,
3063 #if defined(PETSC_HAVE_PBGL)
3064                                        0,
3065 #else
3066                                        0,
3067 #endif
3068                                        0,
3069                                        0,
3070                                        0,
3071                                 /*34*/ MatDuplicate_MPIAIJ,
3072                                        0,
3073                                        0,
3074                                        0,
3075                                        0,
3076                                 /*39*/ MatAXPY_MPIAIJ,
3077                                        MatGetSubMatrices_MPIAIJ,
3078                                        MatIncreaseOverlap_MPIAIJ,
3079                                        MatGetValues_MPIAIJ,
3080                                        MatCopy_MPIAIJ,
3081                                 /*44*/ MatGetRowMax_MPIAIJ,
3082                                        MatScale_MPIAIJ,
3083                                        0,
3084                                        0,
3085                                        MatZeroRowsColumns_MPIAIJ,
3086                                 /*49*/ MatSetRandom_MPIAIJ,
3087                                        0,
3088                                        0,
3089                                        0,
3090                                        0,
3091                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3092                                        0,
3093                                        MatSetUnfactored_MPIAIJ,
3094                                        MatPermute_MPIAIJ,
3095                                        0,
3096                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3097                                        MatDestroy_MPIAIJ,
3098                                        MatView_MPIAIJ,
3099                                        0,
3100                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3101                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3102                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3103                                        0,
3104                                        0,
3105                                        0,
3106                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3107                                        MatGetRowMinAbs_MPIAIJ,
3108                                        0,
3109                                        MatSetColoring_MPIAIJ,
3110                                        0,
3111                                        MatSetValuesAdifor_MPIAIJ,
3112                                 /*75*/ MatFDColoringApply_AIJ,
3113                                        0,
3114                                        0,
3115                                        0,
3116                                        MatFindZeroDiagonals_MPIAIJ,
3117                                 /*80*/ 0,
3118                                        0,
3119                                        0,
3120                                 /*83*/ MatLoad_MPIAIJ,
3121                                        0,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                        0,
3126                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3127                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3128                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3129                                        MatPtAP_MPIAIJ_MPIAIJ,
3130                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3131                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                        0,
3136                                 /*99*/ 0,
3137                                        0,
3138                                        0,
3139                                        MatConjugate_MPIAIJ,
3140                                        0,
3141                                 /*104*/MatSetValuesRow_MPIAIJ,
3142                                        MatRealPart_MPIAIJ,
3143                                        MatImaginaryPart_MPIAIJ,
3144                                        0,
3145                                        0,
3146                                 /*109*/0,
3147                                        MatGetRedundantMatrix_MPIAIJ,
3148                                        MatGetRowMin_MPIAIJ,
3149                                        0,
3150                                        0,
3151                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                 /*119*/0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                        MatGetMultiProcBlock_MPIAIJ,
3161                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3162                                        MatGetColumnNorms_MPIAIJ,
3163                                        MatInvertBlockDiagonal_MPIAIJ,
3164                                        0,
3165                                        MatGetSubMatricesParallel_MPIAIJ,
3166                                 /*129*/0,
3167                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3168                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3169                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3170                                        0,
3171                                 /*134*/0,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                        0,
3176                                 /*139*/0,
3177                                        0,
3178                                        0,
3179                                        MatFDColoringSetUp_MPIXAIJ
3180 };
3181 
3182 /* ----------------------------------------------------------------------------------------*/
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3186 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3187 {
3188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3189   PetscErrorCode ierr;
3190 
3191   PetscFunctionBegin;
3192   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3193   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3194   PetscFunctionReturn(0);
3195 }
3196 
3197 #undef __FUNCT__
3198 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3199 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3200 {
3201   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3202   PetscErrorCode ierr;
3203 
3204   PetscFunctionBegin;
3205   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3206   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3212 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3213 {
3214   Mat_MPIAIJ     *b;
3215   PetscErrorCode ierr;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3219   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3220   b = (Mat_MPIAIJ*)B->data;
3221 
3222   if (!B->preallocated) {
3223     /* Explicitly create 2 MATSEQAIJ matrices. */
3224     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3225     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3226     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3227     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3228     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3229     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3230     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3231     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3232     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3233     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3234   }
3235 
3236   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3237   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3238   B->preallocated = PETSC_TRUE;
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3244 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3245 {
3246   Mat            mat;
3247   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3248   PetscErrorCode ierr;
3249 
3250   PetscFunctionBegin;
3251   *newmat = 0;
3252   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3253   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3254   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3255   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3256   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3257   a       = (Mat_MPIAIJ*)mat->data;
3258 
3259   mat->factortype   = matin->factortype;
3260   mat->assembled    = PETSC_TRUE;
3261   mat->insertmode   = NOT_SET_VALUES;
3262   mat->preallocated = PETSC_TRUE;
3263 
3264   a->size         = oldmat->size;
3265   a->rank         = oldmat->rank;
3266   a->donotstash   = oldmat->donotstash;
3267   a->roworiented  = oldmat->roworiented;
3268   a->rowindices   = 0;
3269   a->rowvalues    = 0;
3270   a->getrowactive = PETSC_FALSE;
3271 
3272   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3273   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3274 
3275   if (oldmat->colmap) {
3276 #if defined(PETSC_USE_CTABLE)
3277     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3278 #else
3279     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3280     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3281     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3282 #endif
3283   } else a->colmap = 0;
3284   if (oldmat->garray) {
3285     PetscInt len;
3286     len  = oldmat->B->cmap->n;
3287     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3288     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3289     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3290   } else a->garray = 0;
3291 
3292   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3293   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3294   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3295   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3296   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3297   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3298   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3299   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3300   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3301   *newmat = mat;
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 
3306 
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatLoad_MPIAIJ"
3309 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3310 {
3311   PetscScalar    *vals,*svals;
3312   MPI_Comm       comm;
3313   PetscErrorCode ierr;
3314   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3315   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3316   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3317   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3318   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3319   int            fd;
3320   PetscInt       bs = newMat->rmap->bs;
3321 
3322   PetscFunctionBegin;
3323   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3324   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3325   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3326   if (!rank) {
3327     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3328     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3329     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3330   }
3331 
3332   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3333   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3334   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3335   if (bs < 0) bs = 1;
3336 
3337   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3338 
3339   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3340   M    = header[1]; N = header[2];
3341   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3342   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3343   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3344 
3345   /* If global sizes are set, check if they are consistent with that given in the file */
3346   if (sizesset) {
3347     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3348   }
3349   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3350   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3351 
3352   /* determine ownership of all (block) rows */
3353   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3354   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3355   else m = newMat->rmap->n; /* Set by user */
3356 
3357   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3358   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3359 
3360   /* First process needs enough room for process with most rows */
3361   if (!rank) {
3362     mmax = rowners[1];
3363     for (i=2; i<=size; i++) {
3364       mmax = PetscMax(mmax, rowners[i]);
3365     }
3366   } else mmax = -1;             /* unused, but compilers complain */
3367 
3368   rowners[0] = 0;
3369   for (i=2; i<=size; i++) {
3370     rowners[i] += rowners[i-1];
3371   }
3372   rstart = rowners[rank];
3373   rend   = rowners[rank+1];
3374 
3375   /* distribute row lengths to all processors */
3376   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3377   if (!rank) {
3378     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3379     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3380     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3381     for (j=0; j<m; j++) {
3382       procsnz[0] += ourlens[j];
3383     }
3384     for (i=1; i<size; i++) {
3385       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3386       /* calculate the number of nonzeros on each processor */
3387       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3388         procsnz[i] += rowlengths[j];
3389       }
3390       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3391     }
3392     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3393   } else {
3394     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3395   }
3396 
3397   if (!rank) {
3398     /* determine max buffer needed and allocate it */
3399     maxnz = 0;
3400     for (i=0; i<size; i++) {
3401       maxnz = PetscMax(maxnz,procsnz[i]);
3402     }
3403     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3404 
3405     /* read in my part of the matrix column indices  */
3406     nz   = procsnz[0];
3407     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3408     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3409 
3410     /* read in every one elses and ship off */
3411     for (i=1; i<size; i++) {
3412       nz   = procsnz[i];
3413       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3414       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3415     }
3416     ierr = PetscFree(cols);CHKERRQ(ierr);
3417   } else {
3418     /* determine buffer space needed for message */
3419     nz = 0;
3420     for (i=0; i<m; i++) {
3421       nz += ourlens[i];
3422     }
3423     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3424 
3425     /* receive message of column indices*/
3426     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3427   }
3428 
3429   /* determine column ownership if matrix is not square */
3430   if (N != M) {
3431     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3432     else n = newMat->cmap->n;
3433     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3434     cstart = cend - n;
3435   } else {
3436     cstart = rstart;
3437     cend   = rend;
3438     n      = cend - cstart;
3439   }
3440 
3441   /* loop over local rows, determining number of off diagonal entries */
3442   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3443   jj   = 0;
3444   for (i=0; i<m; i++) {
3445     for (j=0; j<ourlens[i]; j++) {
3446       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3447       jj++;
3448     }
3449   }
3450 
3451   for (i=0; i<m; i++) {
3452     ourlens[i] -= offlens[i];
3453   }
3454   if (!sizesset) {
3455     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3456   }
3457 
3458   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3459 
3460   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3461 
3462   for (i=0; i<m; i++) {
3463     ourlens[i] += offlens[i];
3464   }
3465 
3466   if (!rank) {
3467     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3468 
3469     /* read in my part of the matrix numerical values  */
3470     nz   = procsnz[0];
3471     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3472 
3473     /* insert into matrix */
3474     jj      = rstart;
3475     smycols = mycols;
3476     svals   = vals;
3477     for (i=0; i<m; i++) {
3478       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3479       smycols += ourlens[i];
3480       svals   += ourlens[i];
3481       jj++;
3482     }
3483 
3484     /* read in other processors and ship out */
3485     for (i=1; i<size; i++) {
3486       nz   = procsnz[i];
3487       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3488       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3489     }
3490     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3491   } else {
3492     /* receive numeric values */
3493     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3494 
3495     /* receive message of values*/
3496     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3497 
3498     /* insert into matrix */
3499     jj      = rstart;
3500     smycols = mycols;
3501     svals   = vals;
3502     for (i=0; i<m; i++) {
3503       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3504       smycols += ourlens[i];
3505       svals   += ourlens[i];
3506       jj++;
3507     }
3508   }
3509   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3510   ierr = PetscFree(vals);CHKERRQ(ierr);
3511   ierr = PetscFree(mycols);CHKERRQ(ierr);
3512   ierr = PetscFree(rowners);CHKERRQ(ierr);
3513   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3514   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3520 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3521 {
3522   PetscErrorCode ierr;
3523   IS             iscol_local;
3524   PetscInt       csize;
3525 
3526   PetscFunctionBegin;
3527   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3528   if (call == MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3530     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531   } else {
3532     PetscInt cbs;
3533     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3534     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3535     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3536   }
3537   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3538   if (call == MAT_INITIAL_MATRIX) {
3539     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3540     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3541   }
3542   PetscFunctionReturn(0);
3543 }
3544 
3545 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3548 /*
3549     Not great since it makes two copies of the submatrix, first an SeqAIJ
3550   in local and then by concatenating the local matrices the end result.
3551   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3552 
3553   Note: This requires a sequential iscol with all indices.
3554 */
3555 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   PetscMPIInt    rank,size;
3559   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3560   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3561   PetscBool      allcolumns, colflag;
3562   Mat            M,Mreuse;
3563   MatScalar      *vwork,*aa;
3564   MPI_Comm       comm;
3565   Mat_SeqAIJ     *aij;
3566 
3567   PetscFunctionBegin;
3568   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3569   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3571 
3572   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3573   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3574   if (colflag && ncol == mat->cmap->N) {
3575     allcolumns = PETSC_TRUE;
3576   } else {
3577     allcolumns = PETSC_FALSE;
3578   }
3579   if (call ==  MAT_REUSE_MATRIX) {
3580     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3581     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3582     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3583   } else {
3584     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3585   }
3586 
3587   /*
3588       m - number of local rows
3589       n - number of columns (same on all processors)
3590       rstart - first row in new global matrix generated
3591   */
3592   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3593   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3594   if (call == MAT_INITIAL_MATRIX) {
3595     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3596     ii  = aij->i;
3597     jj  = aij->j;
3598 
3599     /*
3600         Determine the number of non-zeros in the diagonal and off-diagonal
3601         portions of the matrix in order to do correct preallocation
3602     */
3603 
3604     /* first get start and end of "diagonal" columns */
3605     if (csize == PETSC_DECIDE) {
3606       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3607       if (mglobal == n) { /* square matrix */
3608         nlocal = m;
3609       } else {
3610         nlocal = n/size + ((n % size) > rank);
3611       }
3612     } else {
3613       nlocal = csize;
3614     }
3615     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3616     rstart = rend - nlocal;
3617     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3618 
3619     /* next, compute all the lengths */
3620     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3621     olens = dlens + m;
3622     for (i=0; i<m; i++) {
3623       jend = ii[i+1] - ii[i];
3624       olen = 0;
3625       dlen = 0;
3626       for (j=0; j<jend; j++) {
3627         if (*jj < rstart || *jj >= rend) olen++;
3628         else dlen++;
3629         jj++;
3630       }
3631       olens[i] = olen;
3632       dlens[i] = dlen;
3633     }
3634     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3635     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3636     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3637     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3638     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3639     ierr = PetscFree(dlens);CHKERRQ(ierr);
3640   } else {
3641     PetscInt ml,nl;
3642 
3643     M    = *newmat;
3644     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3645     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3646     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3647     /*
3648          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3649        rather than the slower MatSetValues().
3650     */
3651     M->was_assembled = PETSC_TRUE;
3652     M->assembled     = PETSC_FALSE;
3653   }
3654   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3655   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3656   ii   = aij->i;
3657   jj   = aij->j;
3658   aa   = aij->a;
3659   for (i=0; i<m; i++) {
3660     row   = rstart + i;
3661     nz    = ii[i+1] - ii[i];
3662     cwork = jj;     jj += nz;
3663     vwork = aa;     aa += nz;
3664     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3665   }
3666 
3667   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3668   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669   *newmat = M;
3670 
3671   /* save submatrix used in processor for next request */
3672   if (call ==  MAT_INITIAL_MATRIX) {
3673     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3674     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3675   }
3676   PetscFunctionReturn(0);
3677 }
3678 
3679 #undef __FUNCT__
3680 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3681 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3682 {
3683   PetscInt       m,cstart, cend,j,nnz,i,d;
3684   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3685   const PetscInt *JJ;
3686   PetscScalar    *values;
3687   PetscErrorCode ierr;
3688 
3689   PetscFunctionBegin;
3690   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3691 
3692   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3693   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3694   m      = B->rmap->n;
3695   cstart = B->cmap->rstart;
3696   cend   = B->cmap->rend;
3697   rstart = B->rmap->rstart;
3698 
3699   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3700 
3701 #if defined(PETSC_USE_DEBUGGING)
3702   for (i=0; i<m; i++) {
3703     nnz = Ii[i+1]- Ii[i];
3704     JJ  = J + Ii[i];
3705     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3706     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3707     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3708   }
3709 #endif
3710 
3711   for (i=0; i<m; i++) {
3712     nnz     = Ii[i+1]- Ii[i];
3713     JJ      = J + Ii[i];
3714     nnz_max = PetscMax(nnz_max,nnz);
3715     d       = 0;
3716     for (j=0; j<nnz; j++) {
3717       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3718     }
3719     d_nnz[i] = d;
3720     o_nnz[i] = nnz - d;
3721   }
3722   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3723   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3724 
3725   if (v) values = (PetscScalar*)v;
3726   else {
3727     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3728   }
3729 
3730   for (i=0; i<m; i++) {
3731     ii   = i + rstart;
3732     nnz  = Ii[i+1]- Ii[i];
3733     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3734   }
3735   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3736   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3737 
3738   if (!v) {
3739     ierr = PetscFree(values);CHKERRQ(ierr);
3740   }
3741   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3742   PetscFunctionReturn(0);
3743 }
3744 
3745 #undef __FUNCT__
3746 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3747 /*@
3748    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3749    (the default parallel PETSc format).
3750 
3751    Collective on MPI_Comm
3752 
3753    Input Parameters:
3754 +  B - the matrix
3755 .  i - the indices into j for the start of each local row (starts with zero)
3756 .  j - the column indices for each local row (starts with zero)
3757 -  v - optional values in the matrix
3758 
3759    Level: developer
3760 
3761    Notes:
3762        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3763      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3764      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3765 
3766        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3767 
3768        The format which is used for the sparse matrix input, is equivalent to a
3769     row-major ordering.. i.e for the following matrix, the input data expected is
3770     as shown:
3771 
3772         1 0 0
3773         2 0 3     P0
3774        -------
3775         4 5 6     P1
3776 
3777      Process0 [P0]: rows_owned=[0,1]
3778         i =  {0,1,3}  [size = nrow+1  = 2+1]
3779         j =  {0,0,2}  [size = nz = 6]
3780         v =  {1,2,3}  [size = nz = 6]
3781 
3782      Process1 [P1]: rows_owned=[2]
3783         i =  {0,3}    [size = nrow+1  = 1+1]
3784         j =  {0,1,2}  [size = nz = 6]
3785         v =  {4,5,6}  [size = nz = 6]
3786 
3787 .keywords: matrix, aij, compressed row, sparse, parallel
3788 
3789 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3790           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3791 @*/
3792 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3793 {
3794   PetscErrorCode ierr;
3795 
3796   PetscFunctionBegin;
3797   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 #undef __FUNCT__
3802 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3803 /*@C
3804    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3805    (the default parallel PETSc format).  For good matrix assembly performance
3806    the user should preallocate the matrix storage by setting the parameters
3807    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3808    performance can be increased by more than a factor of 50.
3809 
3810    Collective on MPI_Comm
3811 
3812    Input Parameters:
3813 +  B - the matrix
3814 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3815            (same value is used for all local rows)
3816 .  d_nnz - array containing the number of nonzeros in the various rows of the
3817            DIAGONAL portion of the local submatrix (possibly different for each row)
3818            or NULL, if d_nz is used to specify the nonzero structure.
3819            The size of this array is equal to the number of local rows, i.e 'm'.
3820            For matrices that will be factored, you must leave room for (and set)
3821            the diagonal entry even if it is zero.
3822 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3823            submatrix (same value is used for all local rows).
3824 -  o_nnz - array containing the number of nonzeros in the various rows of the
3825            OFF-DIAGONAL portion of the local submatrix (possibly different for
3826            each row) or NULL, if o_nz is used to specify the nonzero
3827            structure. The size of this array is equal to the number
3828            of local rows, i.e 'm'.
3829 
3830    If the *_nnz parameter is given then the *_nz parameter is ignored
3831 
3832    The AIJ format (also called the Yale sparse matrix format or
3833    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3834    storage.  The stored row and column indices begin with zero.
3835    See Users-Manual: ch_mat for details.
3836 
3837    The parallel matrix is partitioned such that the first m0 rows belong to
3838    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3839    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3840 
3841    The DIAGONAL portion of the local submatrix of a processor can be defined
3842    as the submatrix which is obtained by extraction the part corresponding to
3843    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3844    first row that belongs to the processor, r2 is the last row belonging to
3845    the this processor, and c1-c2 is range of indices of the local part of a
3846    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3847    common case of a square matrix, the row and column ranges are the same and
3848    the DIAGONAL part is also square. The remaining portion of the local
3849    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3850 
3851    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3852 
3853    You can call MatGetInfo() to get information on how effective the preallocation was;
3854    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3855    You can also run with the option -info and look for messages with the string
3856    malloc in them to see if additional memory allocation was needed.
3857 
3858    Example usage:
3859 
3860    Consider the following 8x8 matrix with 34 non-zero values, that is
3861    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3862    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3863    as follows:
3864 
3865 .vb
3866             1  2  0  |  0  3  0  |  0  4
3867     Proc0   0  5  6  |  7  0  0  |  8  0
3868             9  0 10  | 11  0  0  | 12  0
3869     -------------------------------------
3870            13  0 14  | 15 16 17  |  0  0
3871     Proc1   0 18  0  | 19 20 21  |  0  0
3872             0  0  0  | 22 23  0  | 24  0
3873     -------------------------------------
3874     Proc2  25 26 27  |  0  0 28  | 29  0
3875            30  0  0  | 31 32 33  |  0 34
3876 .ve
3877 
3878    This can be represented as a collection of submatrices as:
3879 
3880 .vb
3881       A B C
3882       D E F
3883       G H I
3884 .ve
3885 
3886    Where the submatrices A,B,C are owned by proc0, D,E,F are
3887    owned by proc1, G,H,I are owned by proc2.
3888 
3889    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3890    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3891    The 'M','N' parameters are 8,8, and have the same values on all procs.
3892 
3893    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3894    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3895    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3896    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3897    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3898    matrix, ans [DF] as another SeqAIJ matrix.
3899 
3900    When d_nz, o_nz parameters are specified, d_nz storage elements are
3901    allocated for every row of the local diagonal submatrix, and o_nz
3902    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3903    One way to choose d_nz and o_nz is to use the max nonzerors per local
3904    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3905    In this case, the values of d_nz,o_nz are:
3906 .vb
3907      proc0 : dnz = 2, o_nz = 2
3908      proc1 : dnz = 3, o_nz = 2
3909      proc2 : dnz = 1, o_nz = 4
3910 .ve
3911    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3912    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3913    for proc3. i.e we are using 12+15+10=37 storage locations to store
3914    34 values.
3915 
3916    When d_nnz, o_nnz parameters are specified, the storage is specified
3917    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3918    In the above case the values for d_nnz,o_nnz are:
3919 .vb
3920      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3921      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3922      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3923 .ve
3924    Here the space allocated is sum of all the above values i.e 34, and
3925    hence pre-allocation is perfect.
3926 
3927    Level: intermediate
3928 
3929 .keywords: matrix, aij, compressed row, sparse, parallel
3930 
3931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3932           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3933 @*/
3934 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3935 {
3936   PetscErrorCode ierr;
3937 
3938   PetscFunctionBegin;
3939   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3940   PetscValidType(B,1);
3941   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3942   PetscFunctionReturn(0);
3943 }
3944 
3945 #undef __FUNCT__
3946 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3947 /*@
3948      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3949          CSR format the local rows.
3950 
3951    Collective on MPI_Comm
3952 
3953    Input Parameters:
3954 +  comm - MPI communicator
3955 .  m - number of local rows (Cannot be PETSC_DECIDE)
3956 .  n - This value should be the same as the local size used in creating the
3957        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3958        calculated if N is given) For square matrices n is almost always m.
3959 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3960 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3961 .   i - row indices
3962 .   j - column indices
3963 -   a - matrix values
3964 
3965    Output Parameter:
3966 .   mat - the matrix
3967 
3968    Level: intermediate
3969 
3970    Notes:
3971        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3972      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3973      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3974 
3975        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3976 
3977        The format which is used for the sparse matrix input, is equivalent to a
3978     row-major ordering.. i.e for the following matrix, the input data expected is
3979     as shown:
3980 
3981         1 0 0
3982         2 0 3     P0
3983        -------
3984         4 5 6     P1
3985 
3986      Process0 [P0]: rows_owned=[0,1]
3987         i =  {0,1,3}  [size = nrow+1  = 2+1]
3988         j =  {0,0,2}  [size = nz = 6]
3989         v =  {1,2,3}  [size = nz = 6]
3990 
3991      Process1 [P1]: rows_owned=[2]
3992         i =  {0,3}    [size = nrow+1  = 1+1]
3993         j =  {0,1,2}  [size = nz = 6]
3994         v =  {4,5,6}  [size = nz = 6]
3995 
3996 .keywords: matrix, aij, compressed row, sparse, parallel
3997 
3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3999           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4000 @*/
4001 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4002 {
4003   PetscErrorCode ierr;
4004 
4005   PetscFunctionBegin;
4006   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4007   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4008   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4009   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4010   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4011   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4012   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatCreateAIJ"
4018 /*@C
4019    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4020    (the default parallel PETSc format).  For good matrix assembly performance
4021    the user should preallocate the matrix storage by setting the parameters
4022    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4023    performance can be increased by more than a factor of 50.
4024 
4025    Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +  comm - MPI communicator
4029 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4030            This value should be the same as the local size used in creating the
4031            y vector for the matrix-vector product y = Ax.
4032 .  n - This value should be the same as the local size used in creating the
4033        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4034        calculated if N is given) For square matrices n is almost always m.
4035 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4036 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4037 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4038            (same value is used for all local rows)
4039 .  d_nnz - array containing the number of nonzeros in the various rows of the
4040            DIAGONAL portion of the local submatrix (possibly different for each row)
4041            or NULL, if d_nz is used to specify the nonzero structure.
4042            The size of this array is equal to the number of local rows, i.e 'm'.
4043 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4044            submatrix (same value is used for all local rows).
4045 -  o_nnz - array containing the number of nonzeros in the various rows of the
4046            OFF-DIAGONAL portion of the local submatrix (possibly different for
4047            each row) or NULL, if o_nz is used to specify the nonzero
4048            structure. The size of this array is equal to the number
4049            of local rows, i.e 'm'.
4050 
4051    Output Parameter:
4052 .  A - the matrix
4053 
4054    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4055    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4056    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4057 
4058    Notes:
4059    If the *_nnz parameter is given then the *_nz parameter is ignored
4060 
4061    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4062    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4063    storage requirements for this matrix.
4064 
4065    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4066    processor than it must be used on all processors that share the object for
4067    that argument.
4068 
4069    The user MUST specify either the local or global matrix dimensions
4070    (possibly both).
4071 
4072    The parallel matrix is partitioned across processors such that the
4073    first m0 rows belong to process 0, the next m1 rows belong to
4074    process 1, the next m2 rows belong to process 2 etc.. where
4075    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4076    values corresponding to [m x N] submatrix.
4077 
4078    The columns are logically partitioned with the n0 columns belonging
4079    to 0th partition, the next n1 columns belonging to the next
4080    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4081 
4082    The DIAGONAL portion of the local submatrix on any given processor
4083    is the submatrix corresponding to the rows and columns m,n
4084    corresponding to the given processor. i.e diagonal matrix on
4085    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4086    etc. The remaining portion of the local submatrix [m x (N-n)]
4087    constitute the OFF-DIAGONAL portion. The example below better
4088    illustrates this concept.
4089 
4090    For a square global matrix we define each processor's diagonal portion
4091    to be its local rows and the corresponding columns (a square submatrix);
4092    each processor's off-diagonal portion encompasses the remainder of the
4093    local matrix (a rectangular submatrix).
4094 
4095    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4096 
4097    When calling this routine with a single process communicator, a matrix of
4098    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4099    type of communicator, use the construction mechanism:
4100      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4101 
4102    By default, this format uses inodes (identical nodes) when possible.
4103    We search for consecutive rows with the same nonzero structure, thereby
4104    reusing matrix information to achieve increased efficiency.
4105 
4106    Options Database Keys:
4107 +  -mat_no_inode  - Do not use inodes
4108 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4109 -  -mat_aij_oneindex - Internally use indexing starting at 1
4110         rather than 0.  Note that when calling MatSetValues(),
4111         the user still MUST index entries starting at 0!
4112 
4113 
4114    Example usage:
4115 
4116    Consider the following 8x8 matrix with 34 non-zero values, that is
4117    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4118    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4119    as follows:
4120 
4121 .vb
4122             1  2  0  |  0  3  0  |  0  4
4123     Proc0   0  5  6  |  7  0  0  |  8  0
4124             9  0 10  | 11  0  0  | 12  0
4125     -------------------------------------
4126            13  0 14  | 15 16 17  |  0  0
4127     Proc1   0 18  0  | 19 20 21  |  0  0
4128             0  0  0  | 22 23  0  | 24  0
4129     -------------------------------------
4130     Proc2  25 26 27  |  0  0 28  | 29  0
4131            30  0  0  | 31 32 33  |  0 34
4132 .ve
4133 
4134    This can be represented as a collection of submatrices as:
4135 
4136 .vb
4137       A B C
4138       D E F
4139       G H I
4140 .ve
4141 
4142    Where the submatrices A,B,C are owned by proc0, D,E,F are
4143    owned by proc1, G,H,I are owned by proc2.
4144 
4145    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4146    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4147    The 'M','N' parameters are 8,8, and have the same values on all procs.
4148 
4149    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4150    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4151    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4152    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4153    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4154    matrix, ans [DF] as another SeqAIJ matrix.
4155 
4156    When d_nz, o_nz parameters are specified, d_nz storage elements are
4157    allocated for every row of the local diagonal submatrix, and o_nz
4158    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4159    One way to choose d_nz and o_nz is to use the max nonzerors per local
4160    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4161    In this case, the values of d_nz,o_nz are:
4162 .vb
4163      proc0 : dnz = 2, o_nz = 2
4164      proc1 : dnz = 3, o_nz = 2
4165      proc2 : dnz = 1, o_nz = 4
4166 .ve
4167    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4168    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4169    for proc3. i.e we are using 12+15+10=37 storage locations to store
4170    34 values.
4171 
4172    When d_nnz, o_nnz parameters are specified, the storage is specified
4173    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4174    In the above case the values for d_nnz,o_nnz are:
4175 .vb
4176      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4177      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4178      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4179 .ve
4180    Here the space allocated is sum of all the above values i.e 34, and
4181    hence pre-allocation is perfect.
4182 
4183    Level: intermediate
4184 
4185 .keywords: matrix, aij, compressed row, sparse, parallel
4186 
4187 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4188           MPIAIJ, MatCreateMPIAIJWithArrays()
4189 @*/
4190 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4191 {
4192   PetscErrorCode ierr;
4193   PetscMPIInt    size;
4194 
4195   PetscFunctionBegin;
4196   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4197   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4199   if (size > 1) {
4200     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4201     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4202   } else {
4203     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4204     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4205   }
4206   PetscFunctionReturn(0);
4207 }
4208 
4209 #undef __FUNCT__
4210 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4211 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4212 {
4213   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4214 
4215   PetscFunctionBegin;
4216   if (Ad)     *Ad     = a->A;
4217   if (Ao)     *Ao     = a->B;
4218   if (colmap) *colmap = a->garray;
4219   PetscFunctionReturn(0);
4220 }
4221 
4222 #undef __FUNCT__
4223 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4224 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4225 {
4226   PetscErrorCode ierr;
4227   PetscInt       i;
4228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4229 
4230   PetscFunctionBegin;
4231   if (coloring->ctype == IS_COLORING_GLOBAL) {
4232     ISColoringValue *allcolors,*colors;
4233     ISColoring      ocoloring;
4234 
4235     /* set coloring for diagonal portion */
4236     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4237 
4238     /* set coloring for off-diagonal portion */
4239     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4240     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4241     for (i=0; i<a->B->cmap->n; i++) {
4242       colors[i] = allcolors[a->garray[i]];
4243     }
4244     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4245     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4246     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4247     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4248   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4249     ISColoringValue *colors;
4250     PetscInt        *larray;
4251     ISColoring      ocoloring;
4252 
4253     /* set coloring for diagonal portion */
4254     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4255     for (i=0; i<a->A->cmap->n; i++) {
4256       larray[i] = i + A->cmap->rstart;
4257     }
4258     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4259     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4260     for (i=0; i<a->A->cmap->n; i++) {
4261       colors[i] = coloring->colors[larray[i]];
4262     }
4263     ierr = PetscFree(larray);CHKERRQ(ierr);
4264     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4265     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4266     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4267 
4268     /* set coloring for off-diagonal portion */
4269     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4270     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4271     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4272     for (i=0; i<a->B->cmap->n; i++) {
4273       colors[i] = coloring->colors[larray[i]];
4274     }
4275     ierr = PetscFree(larray);CHKERRQ(ierr);
4276     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4277     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4278     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4279   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 #undef __FUNCT__
4284 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4285 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4286 {
4287   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4288   PetscErrorCode ierr;
4289 
4290   PetscFunctionBegin;
4291   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4292   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4293   PetscFunctionReturn(0);
4294 }
4295 
4296 #undef __FUNCT__
4297 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4298 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4299 {
4300   PetscErrorCode ierr;
4301   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4302   PetscInt       *indx;
4303 
4304   PetscFunctionBegin;
4305   /* This routine will ONLY return MPIAIJ type matrix */
4306   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4307   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4308   if (n == PETSC_DECIDE) {
4309     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4310   }
4311   /* Check sum(n) = N */
4312   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4313   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4314 
4315   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4316   rstart -= m;
4317 
4318   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4319   for (i=0; i<m; i++) {
4320     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4321     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4322     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4323   }
4324 
4325   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4326   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4327   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4328   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4329   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4330   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4331   PetscFunctionReturn(0);
4332 }
4333 
4334 #undef __FUNCT__
4335 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4336 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4337 {
4338   PetscErrorCode ierr;
4339   PetscInt       m,N,i,rstart,nnz,Ii;
4340   PetscInt       *indx;
4341   PetscScalar    *values;
4342 
4343   PetscFunctionBegin;
4344   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4345   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4346   for (i=0; i<m; i++) {
4347     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4348     Ii   = i + rstart;
4349     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4350     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4351   }
4352   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4353   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 #undef __FUNCT__
4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4359 /*@
4360       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4361                  matrices from each processor
4362 
4363     Collective on MPI_Comm
4364 
4365    Input Parameters:
4366 +    comm - the communicators the parallel matrix will live on
4367 .    inmat - the input sequential matrices
4368 .    n - number of local columns (or PETSC_DECIDE)
4369 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4370 
4371    Output Parameter:
4372 .    outmat - the parallel matrix generated
4373 
4374     Level: advanced
4375 
4376    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4377 
4378 @*/
4379 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4380 {
4381   PetscErrorCode ierr;
4382   PetscMPIInt    size;
4383 
4384   PetscFunctionBegin;
4385   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4386   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4387   if (size == 1) {
4388     if (scall == MAT_INITIAL_MATRIX) {
4389       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4390     } else {
4391       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4392     }
4393   } else {
4394     if (scall == MAT_INITIAL_MATRIX) {
4395       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4396     }
4397     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4398   }
4399   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 #undef __FUNCT__
4404 #define __FUNCT__ "MatFileSplit"
4405 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4406 {
4407   PetscErrorCode    ierr;
4408   PetscMPIInt       rank;
4409   PetscInt          m,N,i,rstart,nnz;
4410   size_t            len;
4411   const PetscInt    *indx;
4412   PetscViewer       out;
4413   char              *name;
4414   Mat               B;
4415   const PetscScalar *values;
4416 
4417   PetscFunctionBegin;
4418   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4419   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4420   /* Should this be the type of the diagonal block of A? */
4421   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4422   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4423   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4424   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4425   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4426   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4427   for (i=0; i<m; i++) {
4428     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4429     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4430     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4431   }
4432   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4433   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4434 
4435   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4436   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4437   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4438   sprintf(name,"%s.%d",outfile,rank);
4439   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4440   ierr = PetscFree(name);CHKERRQ(ierr);
4441   ierr = MatView(B,out);CHKERRQ(ierr);
4442   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4443   ierr = MatDestroy(&B);CHKERRQ(ierr);
4444   PetscFunctionReturn(0);
4445 }
4446 
4447 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4448 #undef __FUNCT__
4449 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4450 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4451 {
4452   PetscErrorCode      ierr;
4453   Mat_Merge_SeqsToMPI *merge;
4454   PetscContainer      container;
4455 
4456   PetscFunctionBegin;
4457   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4458   if (container) {
4459     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4460     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4472     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4473     ierr = PetscFree(merge);CHKERRQ(ierr);
4474     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4475   }
4476   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4477   PetscFunctionReturn(0);
4478 }
4479 
4480 #include <../src/mat/utils/freespace.h>
4481 #include <petscbt.h>
4482 
4483 #undef __FUNCT__
4484 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4485 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4486 {
4487   PetscErrorCode      ierr;
4488   MPI_Comm            comm;
4489   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4490   PetscMPIInt         size,rank,taga,*len_s;
4491   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4492   PetscInt            proc,m;
4493   PetscInt            **buf_ri,**buf_rj;
4494   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4495   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4496   MPI_Request         *s_waits,*r_waits;
4497   MPI_Status          *status;
4498   MatScalar           *aa=a->a;
4499   MatScalar           **abuf_r,*ba_i;
4500   Mat_Merge_SeqsToMPI *merge;
4501   PetscContainer      container;
4502 
4503   PetscFunctionBegin;
4504   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4505   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4506 
4507   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4508   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4509 
4510   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4511   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4512 
4513   bi     = merge->bi;
4514   bj     = merge->bj;
4515   buf_ri = merge->buf_ri;
4516   buf_rj = merge->buf_rj;
4517 
4518   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4519   owners = merge->rowmap->range;
4520   len_s  = merge->len_s;
4521 
4522   /* send and recv matrix values */
4523   /*-----------------------------*/
4524   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4525   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4526 
4527   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4528   for (proc=0,k=0; proc<size; proc++) {
4529     if (!len_s[proc]) continue;
4530     i    = owners[proc];
4531     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4532     k++;
4533   }
4534 
4535   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4536   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4537   ierr = PetscFree(status);CHKERRQ(ierr);
4538 
4539   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4540   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4541 
4542   /* insert mat values of mpimat */
4543   /*----------------------------*/
4544   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4545   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4546 
4547   for (k=0; k<merge->nrecv; k++) {
4548     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4549     nrows       = *(buf_ri_k[k]);
4550     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4551     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4552   }
4553 
4554   /* set values of ba */
4555   m = merge->rowmap->n;
4556   for (i=0; i<m; i++) {
4557     arow = owners[rank] + i;
4558     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4559     bnzi = bi[i+1] - bi[i];
4560     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4561 
4562     /* add local non-zero vals of this proc's seqmat into ba */
4563     anzi   = ai[arow+1] - ai[arow];
4564     aj     = a->j + ai[arow];
4565     aa     = a->a + ai[arow];
4566     nextaj = 0;
4567     for (j=0; nextaj<anzi; j++) {
4568       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4569         ba_i[j] += aa[nextaj++];
4570       }
4571     }
4572 
4573     /* add received vals into ba */
4574     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4575       /* i-th row */
4576       if (i == *nextrow[k]) {
4577         anzi   = *(nextai[k]+1) - *nextai[k];
4578         aj     = buf_rj[k] + *(nextai[k]);
4579         aa     = abuf_r[k] + *(nextai[k]);
4580         nextaj = 0;
4581         for (j=0; nextaj<anzi; j++) {
4582           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4583             ba_i[j] += aa[nextaj++];
4584           }
4585         }
4586         nextrow[k]++; nextai[k]++;
4587       }
4588     }
4589     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4590   }
4591   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593 
4594   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4595   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4596   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4597   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4598   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4603 
4604 #undef __FUNCT__
4605 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4606 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4607 {
4608   PetscErrorCode      ierr;
4609   Mat                 B_mpi;
4610   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4611   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4612   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4613   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4614   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4615   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4616   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4617   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4618   MPI_Status          *status;
4619   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4620   PetscBT             lnkbt;
4621   Mat_Merge_SeqsToMPI *merge;
4622   PetscContainer      container;
4623 
4624   PetscFunctionBegin;
4625   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4626 
4627   /* make sure it is a PETSc comm */
4628   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4630   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4631 
4632   ierr = PetscNew(&merge);CHKERRQ(ierr);
4633   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4634 
4635   /* determine row ownership */
4636   /*---------------------------------------------------------*/
4637   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4638   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4639   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4640   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4641   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4642   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4643   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4644 
4645   m      = merge->rowmap->n;
4646   owners = merge->rowmap->range;
4647 
4648   /* determine the number of messages to send, their lengths */
4649   /*---------------------------------------------------------*/
4650   len_s = merge->len_s;
4651 
4652   len          = 0; /* length of buf_si[] */
4653   merge->nsend = 0;
4654   for (proc=0; proc<size; proc++) {
4655     len_si[proc] = 0;
4656     if (proc == rank) {
4657       len_s[proc] = 0;
4658     } else {
4659       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4660       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4661     }
4662     if (len_s[proc]) {
4663       merge->nsend++;
4664       nrows = 0;
4665       for (i=owners[proc]; i<owners[proc+1]; i++) {
4666         if (ai[i+1] > ai[i]) nrows++;
4667       }
4668       len_si[proc] = 2*(nrows+1);
4669       len         += len_si[proc];
4670     }
4671   }
4672 
4673   /* determine the number and length of messages to receive for ij-structure */
4674   /*-------------------------------------------------------------------------*/
4675   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4676   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4677 
4678   /* post the Irecv of j-structure */
4679   /*-------------------------------*/
4680   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4681   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4682 
4683   /* post the Isend of j-structure */
4684   /*--------------------------------*/
4685   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4686 
4687   for (proc=0, k=0; proc<size; proc++) {
4688     if (!len_s[proc]) continue;
4689     i    = owners[proc];
4690     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4691     k++;
4692   }
4693 
4694   /* receives and sends of j-structure are complete */
4695   /*------------------------------------------------*/
4696   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4697   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4698 
4699   /* send and recv i-structure */
4700   /*---------------------------*/
4701   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4702   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4703 
4704   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4705   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4706   for (proc=0,k=0; proc<size; proc++) {
4707     if (!len_s[proc]) continue;
4708     /* form outgoing message for i-structure:
4709          buf_si[0]:                 nrows to be sent
4710                [1:nrows]:           row index (global)
4711                [nrows+1:2*nrows+1]: i-structure index
4712     */
4713     /*-------------------------------------------*/
4714     nrows       = len_si[proc]/2 - 1;
4715     buf_si_i    = buf_si + nrows+1;
4716     buf_si[0]   = nrows;
4717     buf_si_i[0] = 0;
4718     nrows       = 0;
4719     for (i=owners[proc]; i<owners[proc+1]; i++) {
4720       anzi = ai[i+1] - ai[i];
4721       if (anzi) {
4722         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4723         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4724         nrows++;
4725       }
4726     }
4727     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4728     k++;
4729     buf_si += len_si[proc];
4730   }
4731 
4732   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4733   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4734 
4735   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4736   for (i=0; i<merge->nrecv; i++) {
4737     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4738   }
4739 
4740   ierr = PetscFree(len_si);CHKERRQ(ierr);
4741   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4742   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4743   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4744   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4745   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4746   ierr = PetscFree(status);CHKERRQ(ierr);
4747 
4748   /* compute a local seq matrix in each processor */
4749   /*----------------------------------------------*/
4750   /* allocate bi array and free space for accumulating nonzero column info */
4751   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4752   bi[0] = 0;
4753 
4754   /* create and initialize a linked list */
4755   nlnk = N+1;
4756   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4757 
4758   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4759   len  = ai[owners[rank+1]] - ai[owners[rank]];
4760   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4761 
4762   current_space = free_space;
4763 
4764   /* determine symbolic info for each local row */
4765   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4766 
4767   for (k=0; k<merge->nrecv; k++) {
4768     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4769     nrows       = *buf_ri_k[k];
4770     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4771     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4772   }
4773 
4774   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4775   len  = 0;
4776   for (i=0; i<m; i++) {
4777     bnzi = 0;
4778     /* add local non-zero cols of this proc's seqmat into lnk */
4779     arow  = owners[rank] + i;
4780     anzi  = ai[arow+1] - ai[arow];
4781     aj    = a->j + ai[arow];
4782     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4783     bnzi += nlnk;
4784     /* add received col data into lnk */
4785     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4786       if (i == *nextrow[k]) { /* i-th row */
4787         anzi  = *(nextai[k]+1) - *nextai[k];
4788         aj    = buf_rj[k] + *nextai[k];
4789         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4790         bnzi += nlnk;
4791         nextrow[k]++; nextai[k]++;
4792       }
4793     }
4794     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4795 
4796     /* if free space is not available, make more free space */
4797     if (current_space->local_remaining<bnzi) {
4798       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4799       nspacedouble++;
4800     }
4801     /* copy data into free space, then initialize lnk */
4802     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4803     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4804 
4805     current_space->array           += bnzi;
4806     current_space->local_used      += bnzi;
4807     current_space->local_remaining -= bnzi;
4808 
4809     bi[i+1] = bi[i] + bnzi;
4810   }
4811 
4812   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4813 
4814   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4815   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4816   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4817 
4818   /* create symbolic parallel matrix B_mpi */
4819   /*---------------------------------------*/
4820   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4821   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4822   if (n==PETSC_DECIDE) {
4823     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4824   } else {
4825     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4826   }
4827   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4828   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4829   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4830   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4831   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4832 
4833   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4834   B_mpi->assembled    = PETSC_FALSE;
4835   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4836   merge->bi           = bi;
4837   merge->bj           = bj;
4838   merge->buf_ri       = buf_ri;
4839   merge->buf_rj       = buf_rj;
4840   merge->coi          = NULL;
4841   merge->coj          = NULL;
4842   merge->owners_co    = NULL;
4843 
4844   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4845 
4846   /* attach the supporting struct to B_mpi for reuse */
4847   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4848   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4849   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4850   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4851   *mpimat = B_mpi;
4852 
4853   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4854   PetscFunctionReturn(0);
4855 }
4856 
4857 #undef __FUNCT__
4858 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4859 /*@C
4860       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4861                  matrices from each processor
4862 
4863     Collective on MPI_Comm
4864 
4865    Input Parameters:
4866 +    comm - the communicators the parallel matrix will live on
4867 .    seqmat - the input sequential matrices
4868 .    m - number of local rows (or PETSC_DECIDE)
4869 .    n - number of local columns (or PETSC_DECIDE)
4870 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4871 
4872    Output Parameter:
4873 .    mpimat - the parallel matrix generated
4874 
4875     Level: advanced
4876 
4877    Notes:
4878      The dimensions of the sequential matrix in each processor MUST be the same.
4879      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4880      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4881 @*/
4882 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4883 {
4884   PetscErrorCode ierr;
4885   PetscMPIInt    size;
4886 
4887   PetscFunctionBegin;
4888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4889   if (size == 1) {
4890     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4891     if (scall == MAT_INITIAL_MATRIX) {
4892       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4893     } else {
4894       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4895     }
4896     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4897     PetscFunctionReturn(0);
4898   }
4899   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4900   if (scall == MAT_INITIAL_MATRIX) {
4901     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4902   }
4903   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4904   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4905   PetscFunctionReturn(0);
4906 }
4907 
4908 #undef __FUNCT__
4909 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4910 /*@
4911      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4912           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4913           with MatGetSize()
4914 
4915     Not Collective
4916 
4917    Input Parameters:
4918 +    A - the matrix
4919 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4920 
4921    Output Parameter:
4922 .    A_loc - the local sequential matrix generated
4923 
4924     Level: developer
4925 
4926 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4927 
4928 @*/
4929 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4930 {
4931   PetscErrorCode ierr;
4932   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4933   Mat_SeqAIJ     *mat,*a,*b;
4934   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4935   MatScalar      *aa,*ba,*cam;
4936   PetscScalar    *ca;
4937   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4938   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4939   PetscBool      match;
4940   MPI_Comm       comm;
4941   PetscMPIInt    size;
4942 
4943   PetscFunctionBegin;
4944   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4945   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4946   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4947   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4948   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4949 
4950   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4951   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4952   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4953   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4954   aa = a->a; ba = b->a;
4955   if (scall == MAT_INITIAL_MATRIX) {
4956     if (size == 1) {
4957       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4958       PetscFunctionReturn(0);
4959     }
4960 
4961     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4962     ci[0] = 0;
4963     for (i=0; i<am; i++) {
4964       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4965     }
4966     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4967     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4968     k    = 0;
4969     for (i=0; i<am; i++) {
4970       ncols_o = bi[i+1] - bi[i];
4971       ncols_d = ai[i+1] - ai[i];
4972       /* off-diagonal portion of A */
4973       for (jo=0; jo<ncols_o; jo++) {
4974         col = cmap[*bj];
4975         if (col >= cstart) break;
4976         cj[k]   = col; bj++;
4977         ca[k++] = *ba++;
4978       }
4979       /* diagonal portion of A */
4980       for (j=0; j<ncols_d; j++) {
4981         cj[k]   = cstart + *aj++;
4982         ca[k++] = *aa++;
4983       }
4984       /* off-diagonal portion of A */
4985       for (j=jo; j<ncols_o; j++) {
4986         cj[k]   = cmap[*bj++];
4987         ca[k++] = *ba++;
4988       }
4989     }
4990     /* put together the new matrix */
4991     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4992     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4993     /* Since these are PETSc arrays, change flags to free them as necessary. */
4994     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4995     mat->free_a  = PETSC_TRUE;
4996     mat->free_ij = PETSC_TRUE;
4997     mat->nonew   = 0;
4998   } else if (scall == MAT_REUSE_MATRIX) {
4999     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5000     ci = mat->i; cj = mat->j; cam = mat->a;
5001     for (i=0; i<am; i++) {
5002       /* off-diagonal portion of A */
5003       ncols_o = bi[i+1] - bi[i];
5004       for (jo=0; jo<ncols_o; jo++) {
5005         col = cmap[*bj];
5006         if (col >= cstart) break;
5007         *cam++ = *ba++; bj++;
5008       }
5009       /* diagonal portion of A */
5010       ncols_d = ai[i+1] - ai[i];
5011       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5012       /* off-diagonal portion of A */
5013       for (j=jo; j<ncols_o; j++) {
5014         *cam++ = *ba++; bj++;
5015       }
5016     }
5017   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5018   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5024 /*@C
5025      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5026 
5027     Not Collective
5028 
5029    Input Parameters:
5030 +    A - the matrix
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    row, col - index sets of rows and columns to extract (or NULL)
5033 
5034    Output Parameter:
5035 .    A_loc - the local sequential matrix generated
5036 
5037     Level: developer
5038 
5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5040 
5041 @*/
5042 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5043 {
5044   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5045   PetscErrorCode ierr;
5046   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5047   IS             isrowa,iscola;
5048   Mat            *aloc;
5049   PetscBool      match;
5050 
5051   PetscFunctionBegin;
5052   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5053   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5054   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5055   if (!row) {
5056     start = A->rmap->rstart; end = A->rmap->rend;
5057     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5058   } else {
5059     isrowa = *row;
5060   }
5061   if (!col) {
5062     start = A->cmap->rstart;
5063     cmap  = a->garray;
5064     nzA   = a->A->cmap->n;
5065     nzB   = a->B->cmap->n;
5066     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5067     ncols = 0;
5068     for (i=0; i<nzB; i++) {
5069       if (cmap[i] < start) idx[ncols++] = cmap[i];
5070       else break;
5071     }
5072     imark = i;
5073     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5074     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5075     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5076   } else {
5077     iscola = *col;
5078   }
5079   if (scall != MAT_INITIAL_MATRIX) {
5080     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5081     aloc[0] = *A_loc;
5082   }
5083   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5084   *A_loc = aloc[0];
5085   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5086   if (!row) {
5087     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5088   }
5089   if (!col) {
5090     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5091   }
5092   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 #undef __FUNCT__
5097 #define __FUNCT__ "MatGetBrowsOfAcols"
5098 /*@C
5099     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5100 
5101     Collective on Mat
5102 
5103    Input Parameters:
5104 +    A,B - the matrices in mpiaij format
5105 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5106 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5107 
5108    Output Parameter:
5109 +    rowb, colb - index sets of rows and columns of B to extract
5110 -    B_seq - the sequential matrix generated
5111 
5112     Level: developer
5113 
5114 @*/
5115 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5116 {
5117   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5118   PetscErrorCode ierr;
5119   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5120   IS             isrowb,iscolb;
5121   Mat            *bseq=NULL;
5122 
5123   PetscFunctionBegin;
5124   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5125     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5126   }
5127   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5128 
5129   if (scall == MAT_INITIAL_MATRIX) {
5130     start = A->cmap->rstart;
5131     cmap  = a->garray;
5132     nzA   = a->A->cmap->n;
5133     nzB   = a->B->cmap->n;
5134     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5135     ncols = 0;
5136     for (i=0; i<nzB; i++) {  /* row < local row index */
5137       if (cmap[i] < start) idx[ncols++] = cmap[i];
5138       else break;
5139     }
5140     imark = i;
5141     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5142     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5143     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5144     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5145   } else {
5146     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5147     isrowb  = *rowb; iscolb = *colb;
5148     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5149     bseq[0] = *B_seq;
5150   }
5151   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5152   *B_seq = bseq[0];
5153   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5154   if (!rowb) {
5155     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5156   } else {
5157     *rowb = isrowb;
5158   }
5159   if (!colb) {
5160     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5161   } else {
5162     *colb = iscolb;
5163   }
5164   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 #undef __FUNCT__
5169 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5170 /*
5171     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5172     of the OFF-DIAGONAL portion of local A
5173 
5174     Collective on Mat
5175 
5176    Input Parameters:
5177 +    A,B - the matrices in mpiaij format
5178 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5179 
5180    Output Parameter:
5181 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5182 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5183 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5184 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5185 
5186     Level: developer
5187 
5188 */
5189 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5190 {
5191   VecScatter_MPI_General *gen_to,*gen_from;
5192   PetscErrorCode         ierr;
5193   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5194   Mat_SeqAIJ             *b_oth;
5195   VecScatter             ctx =a->Mvctx;
5196   MPI_Comm               comm;
5197   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5198   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5199   PetscScalar            *rvalues,*svalues;
5200   MatScalar              *b_otha,*bufa,*bufA;
5201   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5202   MPI_Request            *rwaits = NULL,*swaits = NULL;
5203   MPI_Status             *sstatus,rstatus;
5204   PetscMPIInt            jj,size;
5205   PetscInt               *cols,sbs,rbs;
5206   PetscScalar            *vals;
5207 
5208   PetscFunctionBegin;
5209   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5210   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5211   if (size == 1) PetscFunctionReturn(0);
5212 
5213   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5214     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5215   }
5216   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5217   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5218 
5219   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5220   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5221   rvalues  = gen_from->values; /* holds the length of receiving row */
5222   svalues  = gen_to->values;   /* holds the length of sending row */
5223   nrecvs   = gen_from->n;
5224   nsends   = gen_to->n;
5225 
5226   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5227   srow    = gen_to->indices;    /* local row index to be sent */
5228   sstarts = gen_to->starts;
5229   sprocs  = gen_to->procs;
5230   sstatus = gen_to->sstatus;
5231   sbs     = gen_to->bs;
5232   rstarts = gen_from->starts;
5233   rprocs  = gen_from->procs;
5234   rbs     = gen_from->bs;
5235 
5236   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5237   if (scall == MAT_INITIAL_MATRIX) {
5238     /* i-array */
5239     /*---------*/
5240     /*  post receives */
5241     for (i=0; i<nrecvs; i++) {
5242       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5243       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5244       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5245     }
5246 
5247     /* pack the outgoing message */
5248     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5249 
5250     sstartsj[0] = 0;
5251     rstartsj[0] = 0;
5252     len         = 0; /* total length of j or a array to be sent */
5253     k           = 0;
5254     for (i=0; i<nsends; i++) {
5255       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5256       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5257       for (j=0; j<nrows; j++) {
5258         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5259         for (l=0; l<sbs; l++) {
5260           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5261 
5262           rowlen[j*sbs+l] = ncols;
5263 
5264           len += ncols;
5265           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5266         }
5267         k++;
5268       }
5269       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5270 
5271       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5272     }
5273     /* recvs and sends of i-array are completed */
5274     i = nrecvs;
5275     while (i--) {
5276       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5277     }
5278     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5279 
5280     /* allocate buffers for sending j and a arrays */
5281     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5282     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5283 
5284     /* create i-array of B_oth */
5285     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5286 
5287     b_othi[0] = 0;
5288     len       = 0; /* total length of j or a array to be received */
5289     k         = 0;
5290     for (i=0; i<nrecvs; i++) {
5291       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5292       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5293       for (j=0; j<nrows; j++) {
5294         b_othi[k+1] = b_othi[k] + rowlen[j];
5295         len        += rowlen[j]; k++;
5296       }
5297       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5298     }
5299 
5300     /* allocate space for j and a arrrays of B_oth */
5301     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5302     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5303 
5304     /* j-array */
5305     /*---------*/
5306     /*  post receives of j-array */
5307     for (i=0; i<nrecvs; i++) {
5308       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5309       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5310     }
5311 
5312     /* pack the outgoing message j-array */
5313     k = 0;
5314     for (i=0; i<nsends; i++) {
5315       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5316       bufJ  = bufj+sstartsj[i];
5317       for (j=0; j<nrows; j++) {
5318         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5319         for (ll=0; ll<sbs; ll++) {
5320           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5321           for (l=0; l<ncols; l++) {
5322             *bufJ++ = cols[l];
5323           }
5324           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5325         }
5326       }
5327       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5328     }
5329 
5330     /* recvs and sends of j-array are completed */
5331     i = nrecvs;
5332     while (i--) {
5333       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5334     }
5335     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5336   } else if (scall == MAT_REUSE_MATRIX) {
5337     sstartsj = *startsj_s;
5338     rstartsj = *startsj_r;
5339     bufa     = *bufa_ptr;
5340     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5341     b_otha   = b_oth->a;
5342   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5343 
5344   /* a-array */
5345   /*---------*/
5346   /*  post receives of a-array */
5347   for (i=0; i<nrecvs; i++) {
5348     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5349     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5350   }
5351 
5352   /* pack the outgoing message a-array */
5353   k = 0;
5354   for (i=0; i<nsends; i++) {
5355     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5356     bufA  = bufa+sstartsj[i];
5357     for (j=0; j<nrows; j++) {
5358       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5359       for (ll=0; ll<sbs; ll++) {
5360         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5361         for (l=0; l<ncols; l++) {
5362           *bufA++ = vals[l];
5363         }
5364         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5365       }
5366     }
5367     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5368   }
5369   /* recvs and sends of a-array are completed */
5370   i = nrecvs;
5371   while (i--) {
5372     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5373   }
5374   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5375   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5376 
5377   if (scall == MAT_INITIAL_MATRIX) {
5378     /* put together the new matrix */
5379     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5380 
5381     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5382     /* Since these are PETSc arrays, change flags to free them as necessary. */
5383     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5384     b_oth->free_a  = PETSC_TRUE;
5385     b_oth->free_ij = PETSC_TRUE;
5386     b_oth->nonew   = 0;
5387 
5388     ierr = PetscFree(bufj);CHKERRQ(ierr);
5389     if (!startsj_s || !bufa_ptr) {
5390       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5391       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5392     } else {
5393       *startsj_s = sstartsj;
5394       *startsj_r = rstartsj;
5395       *bufa_ptr  = bufa;
5396     }
5397   }
5398   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5399   PetscFunctionReturn(0);
5400 }
5401 
5402 #undef __FUNCT__
5403 #define __FUNCT__ "MatGetCommunicationStructs"
5404 /*@C
5405   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5406 
5407   Not Collective
5408 
5409   Input Parameters:
5410 . A - The matrix in mpiaij format
5411 
5412   Output Parameter:
5413 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5414 . colmap - A map from global column index to local index into lvec
5415 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5416 
5417   Level: developer
5418 
5419 @*/
5420 #if defined(PETSC_USE_CTABLE)
5421 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5422 #else
5423 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5424 #endif
5425 {
5426   Mat_MPIAIJ *a;
5427 
5428   PetscFunctionBegin;
5429   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5430   PetscValidPointer(lvec, 2);
5431   PetscValidPointer(colmap, 3);
5432   PetscValidPointer(multScatter, 4);
5433   a = (Mat_MPIAIJ*) A->data;
5434   if (lvec) *lvec = a->lvec;
5435   if (colmap) *colmap = a->colmap;
5436   if (multScatter) *multScatter = a->Mvctx;
5437   PetscFunctionReturn(0);
5438 }
5439 
5440 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5443 
5444 #undef __FUNCT__
5445 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5446 /*
5447     Computes (B'*A')' since computing B*A directly is untenable
5448 
5449                n                       p                          p
5450         (              )       (              )         (                  )
5451       m (      A       )  *  n (       B      )   =   m (         C        )
5452         (              )       (              )         (                  )
5453 
5454 */
5455 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5456 {
5457   PetscErrorCode ierr;
5458   Mat            At,Bt,Ct;
5459 
5460   PetscFunctionBegin;
5461   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5462   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5463   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5464   ierr = MatDestroy(&At);CHKERRQ(ierr);
5465   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5466   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5467   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5468   PetscFunctionReturn(0);
5469 }
5470 
5471 #undef __FUNCT__
5472 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5473 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5474 {
5475   PetscErrorCode ierr;
5476   PetscInt       m=A->rmap->n,n=B->cmap->n;
5477   Mat            Cmat;
5478 
5479   PetscFunctionBegin;
5480   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5481   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5482   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5483   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5484   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5485   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5486   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5487   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5488 
5489   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5490 
5491   *C = Cmat;
5492   PetscFunctionReturn(0);
5493 }
5494 
5495 /* ----------------------------------------------------------------*/
5496 #undef __FUNCT__
5497 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5498 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5499 {
5500   PetscErrorCode ierr;
5501 
5502   PetscFunctionBegin;
5503   if (scall == MAT_INITIAL_MATRIX) {
5504     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5505     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5506     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5507   }
5508   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5509   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5510   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 #if defined(PETSC_HAVE_MUMPS)
5515 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5516 #endif
5517 #if defined(PETSC_HAVE_PASTIX)
5518 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_SUPERLU_DIST)
5521 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5522 #endif
5523 #if defined(PETSC_HAVE_CLIQUE)
5524 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5525 #endif
5526 
5527 /*MC
5528    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5529 
5530    Options Database Keys:
5531 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5532 
5533   Level: beginner
5534 
5535 .seealso: MatCreateAIJ()
5536 M*/
5537 
5538 #undef __FUNCT__
5539 #define __FUNCT__ "MatCreate_MPIAIJ"
5540 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5541 {
5542   Mat_MPIAIJ     *b;
5543   PetscErrorCode ierr;
5544   PetscMPIInt    size;
5545 
5546   PetscFunctionBegin;
5547   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5548 
5549   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5550   B->data       = (void*)b;
5551   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5552   B->assembled  = PETSC_FALSE;
5553   B->insertmode = NOT_SET_VALUES;
5554   b->size       = size;
5555 
5556   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5557 
5558   /* build cache for off array entries formed */
5559   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5560 
5561   b->donotstash  = PETSC_FALSE;
5562   b->colmap      = 0;
5563   b->garray      = 0;
5564   b->roworiented = PETSC_TRUE;
5565 
5566   /* stuff used for matrix vector multiply */
5567   b->lvec  = NULL;
5568   b->Mvctx = NULL;
5569 
5570   /* stuff for MatGetRow() */
5571   b->rowindices   = 0;
5572   b->rowvalues    = 0;
5573   b->getrowactive = PETSC_FALSE;
5574 
5575   /* flexible pointer used in CUSP/CUSPARSE classes */
5576   b->spptr = NULL;
5577 
5578 #if defined(PETSC_HAVE_MUMPS)
5579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5580 #endif
5581 #if defined(PETSC_HAVE_PASTIX)
5582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5583 #endif
5584 #if defined(PETSC_HAVE_SUPERLU_DIST)
5585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5586 #endif
5587 #if defined(PETSC_HAVE_CLIQUE)
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5589 #endif
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5604   PetscFunctionReturn(0);
5605 }
5606 
5607 #undef __FUNCT__
5608 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5609 /*@C
5610      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5611          and "off-diagonal" part of the matrix in CSR format.
5612 
5613    Collective on MPI_Comm
5614 
5615    Input Parameters:
5616 +  comm - MPI communicator
5617 .  m - number of local rows (Cannot be PETSC_DECIDE)
5618 .  n - This value should be the same as the local size used in creating the
5619        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5620        calculated if N is given) For square matrices n is almost always m.
5621 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5622 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5623 .   i - row indices for "diagonal" portion of matrix
5624 .   j - column indices
5625 .   a - matrix values
5626 .   oi - row indices for "off-diagonal" portion of matrix
5627 .   oj - column indices
5628 -   oa - matrix values
5629 
5630    Output Parameter:
5631 .   mat - the matrix
5632 
5633    Level: advanced
5634 
5635    Notes:
5636        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5637        must free the arrays once the matrix has been destroyed and not before.
5638 
5639        The i and j indices are 0 based
5640 
5641        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5642 
5643        This sets local rows and cannot be used to set off-processor values.
5644 
5645        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5646        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5647        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5648        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5649        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5650        communication if it is known that only local entries will be set.
5651 
5652 .keywords: matrix, aij, compressed row, sparse, parallel
5653 
5654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5655           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5656 C@*/
5657 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5658 {
5659   PetscErrorCode ierr;
5660   Mat_MPIAIJ     *maij;
5661 
5662   PetscFunctionBegin;
5663   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5664   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5665   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5666   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5667   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5668   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5669   maij = (Mat_MPIAIJ*) (*mat)->data;
5670 
5671   (*mat)->preallocated = PETSC_TRUE;
5672 
5673   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5674   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5675 
5676   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5677   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5678 
5679   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5680   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5681   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683 
5684   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5687   PetscFunctionReturn(0);
5688 }
5689 
5690 /*
5691     Special version for direct calls from Fortran
5692 */
5693 #include <petsc-private/fortranimpl.h>
5694 
5695 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5696 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5697 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5698 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5699 #endif
5700 
5701 /* Change these macros so can be used in void function */
5702 #undef CHKERRQ
5703 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5704 #undef SETERRQ2
5705 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5706 #undef SETERRQ3
5707 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5708 #undef SETERRQ
5709 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5710 
5711 #undef __FUNCT__
5712 #define __FUNCT__ "matsetvaluesmpiaij_"
5713 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5714 {
5715   Mat            mat  = *mmat;
5716   PetscInt       m    = *mm, n = *mn;
5717   InsertMode     addv = *maddv;
5718   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5719   PetscScalar    value;
5720   PetscErrorCode ierr;
5721 
5722   MatCheckPreallocated(mat,1);
5723   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5724 
5725 #if defined(PETSC_USE_DEBUG)
5726   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5727 #endif
5728   {
5729     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5730     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5731     PetscBool roworiented = aij->roworiented;
5732 
5733     /* Some Variables required in the macro */
5734     Mat        A                 = aij->A;
5735     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5736     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5737     MatScalar  *aa               = a->a;
5738     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5739     Mat        B                 = aij->B;
5740     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5741     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5742     MatScalar  *ba               = b->a;
5743 
5744     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5745     PetscInt  nonew = a->nonew;
5746     MatScalar *ap1,*ap2;
5747 
5748     PetscFunctionBegin;
5749     for (i=0; i<m; i++) {
5750       if (im[i] < 0) continue;
5751 #if defined(PETSC_USE_DEBUG)
5752       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5753 #endif
5754       if (im[i] >= rstart && im[i] < rend) {
5755         row      = im[i] - rstart;
5756         lastcol1 = -1;
5757         rp1      = aj + ai[row];
5758         ap1      = aa + ai[row];
5759         rmax1    = aimax[row];
5760         nrow1    = ailen[row];
5761         low1     = 0;
5762         high1    = nrow1;
5763         lastcol2 = -1;
5764         rp2      = bj + bi[row];
5765         ap2      = ba + bi[row];
5766         rmax2    = bimax[row];
5767         nrow2    = bilen[row];
5768         low2     = 0;
5769         high2    = nrow2;
5770 
5771         for (j=0; j<n; j++) {
5772           if (roworiented) value = v[i*n+j];
5773           else value = v[i+j*m];
5774           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5775           if (in[j] >= cstart && in[j] < cend) {
5776             col = in[j] - cstart;
5777             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5778           } else if (in[j] < 0) continue;
5779 #if defined(PETSC_USE_DEBUG)
5780           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5781 #endif
5782           else {
5783             if (mat->was_assembled) {
5784               if (!aij->colmap) {
5785                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5786               }
5787 #if defined(PETSC_USE_CTABLE)
5788               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5789               col--;
5790 #else
5791               col = aij->colmap[in[j]] - 1;
5792 #endif
5793               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5794                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5795                 col  =  in[j];
5796                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5797                 B     = aij->B;
5798                 b     = (Mat_SeqAIJ*)B->data;
5799                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5800                 rp2   = bj + bi[row];
5801                 ap2   = ba + bi[row];
5802                 rmax2 = bimax[row];
5803                 nrow2 = bilen[row];
5804                 low2  = 0;
5805                 high2 = nrow2;
5806                 bm    = aij->B->rmap->n;
5807                 ba    = b->a;
5808               }
5809             } else col = in[j];
5810             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5811           }
5812         }
5813       } else if (!aij->donotstash) {
5814         if (roworiented) {
5815           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5816         } else {
5817           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5818         }
5819       }
5820     }
5821   }
5822   PetscFunctionReturnVoid();
5823 }
5824 
5825