xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3059b6fa11adf3474fbbb67840327986965c3972)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386 
1387     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1388     if (!rank) {
1389       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1390     } else {
1391       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1392     }
1393     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1394     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1395     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1396     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1397     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1398 
1399     /* copy over the A part */
1400     Aloc = (Mat_SeqAIJ*)aij->A->data;
1401     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408     }
1409     aj = Aloc->j;
1410     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1411 
1412     /* copy over the B part */
1413     Aloc = (Mat_SeqAIJ*)aij->B->data;
1414     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1415     row  = mat->rmap->rstart;
1416     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1417     ct   = cols;
1418     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423     }
1424     ierr = PetscFree(ct);CHKERRQ(ierr);
1425     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     /*
1428        Everyone has to call to draw the matrix since the graphics waits are
1429        synchronized across all processors that share the PetscDraw object
1430     */
1431     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1432     if (!rank) {
1433       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1436     ierr = MatDestroy(&A);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ"
1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1444 {
1445   PetscErrorCode ierr;
1446   PetscBool      iascii,isdraw,issocket,isbinary;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1453   if (iascii || isdraw || isbinary || issocket) {
1454     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSOR_MPIAIJ"
1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscErrorCode ierr;
1465   Vec            bb1 = 0;
1466   PetscBool      hasop;
1467 
1468   PetscFunctionBegin;
1469   if (flag == SOR_APPLY_UPPER) {
1470     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471     PetscFunctionReturn(0);
1472   }
1473 
1474   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1475     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1476   }
1477 
1478   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483 
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_EISENSTAT) {
1528     Vec xx1;
1529 
1530     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1531     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1532 
1533     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535     if (!mat->diag) {
1536       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1537       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1538     }
1539     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1540     if (hasop) {
1541       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1542     } else {
1543       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1544     }
1545     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1546 
1547     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1548 
1549     /* local sweep */
1550     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1551     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1552     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1553   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1554 
1555   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatPermute_MPIAIJ"
1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1562 {
1563   Mat            aA,aB,Aperm;
1564   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1565   PetscScalar    *aa,*ba;
1566   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1567   PetscSF        rowsf,sf;
1568   IS             parcolp = NULL;
1569   PetscBool      done;
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1574   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1575   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1576   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1577 
1578   /* Invert row permutation to find out where my rows should go */
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1582   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1583   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1585 
1586   /* Invert column permutation to find out where my columns should go */
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1591   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1594 
1595   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1596   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1597   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1598 
1599   /* Find out where my gcols should go */
1600   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1601   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1602   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1603   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1604   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1608 
1609   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1610   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt row = rdest[i],rowner;
1614     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1615     for (j=ai[i]; j<ai[i+1]; j++) {
1616       PetscInt cowner,col = cdest[aj[j]];
1617       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1618       if (rowner == cowner) dnnz[i]++;
1619       else onnz[i]++;
1620     }
1621     for (j=bi[i]; j<bi[i+1]; j++) {
1622       PetscInt cowner,col = gcdest[bj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627   }
1628   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1629   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1630   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1631   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1633 
1634   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1635   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) {
1638     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1639     PetscInt j0,rowlen;
1640     rowlen = ai[i+1] - ai[i];
1641     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1642       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1643       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1644     }
1645     rowlen = bi[i+1] - bi[i];
1646     for (j0=j=0; j<rowlen; j0=j) {
1647       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1648       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1649     }
1650   }
1651   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1654   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1655   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1657   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1658   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1659   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1660   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1661   *B = Aperm;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1668 {
1669   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1670   Mat            A    = mat->A,B = mat->B;
1671   PetscErrorCode ierr;
1672   PetscReal      isend[5],irecv[5];
1673 
1674   PetscFunctionBegin;
1675   info->block_size = 1.0;
1676   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1677 
1678   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1679   isend[3] = info->memory;  isend[4] = info->mallocs;
1680 
1681   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1682 
1683   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1684   isend[3] += info->memory;  isend[4] += info->mallocs;
1685   if (flag == MAT_LOCAL) {
1686     info->nz_used      = isend[0];
1687     info->nz_allocated = isend[1];
1688     info->nz_unneeded  = isend[2];
1689     info->memory       = isend[3];
1690     info->mallocs      = isend[4];
1691   } else if (flag == MAT_GLOBAL_MAX) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   } else if (flag == MAT_GLOBAL_SUM) {
1700     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1701 
1702     info->nz_used      = irecv[0];
1703     info->nz_allocated = irecv[1];
1704     info->nz_unneeded  = irecv[2];
1705     info->memory       = irecv[3];
1706     info->mallocs      = irecv[4];
1707   }
1708   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1709   info->fill_ratio_needed = 0;
1710   info->factor_mallocs    = 0;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatSetOption_MPIAIJ"
1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1717 {
1718   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   switch (op) {
1723   case MAT_NEW_NONZERO_LOCATIONS:
1724   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1725   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1726   case MAT_KEEP_NONZERO_PATTERN:
1727   case MAT_NEW_NONZERO_LOCATION_ERR:
1728   case MAT_USE_INODES:
1729   case MAT_IGNORE_ZERO_ENTRIES:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_ROW_ORIENTED:
1735     a->roworiented = flg;
1736 
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_NEW_DIAGONALS:
1741     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1742     break;
1743   case MAT_IGNORE_OFF_PROC_ENTRIES:
1744     a->donotstash = flg;
1745     break;
1746   case MAT_SPD:
1747     A->spd_set = PETSC_TRUE;
1748     A->spd     = flg;
1749     if (flg) {
1750       A->symmetric                  = PETSC_TRUE;
1751       A->structurally_symmetric     = PETSC_TRUE;
1752       A->symmetric_set              = PETSC_TRUE;
1753       A->structurally_symmetric_set = PETSC_TRUE;
1754     }
1755     break;
1756   case MAT_SYMMETRIC:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_STRUCTURALLY_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_HERMITIAN:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_SYMMETRY_ETERNAL:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   default:
1769     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatGetRow_MPIAIJ"
1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1777 {
1778   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1779   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1780   PetscErrorCode ierr;
1781   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1782   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1783   PetscInt       *cmap,*idx_p;
1784 
1785   PetscFunctionBegin;
1786   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1787   mat->getrowactive = PETSC_TRUE;
1788 
1789   if (!mat->rowvalues && (idx || v)) {
1790     /*
1791         allocate enough space to hold information from the longest row.
1792     */
1793     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1794     PetscInt   max = 1,tmp;
1795     for (i=0; i<matin->rmap->n; i++) {
1796       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1797       if (max < tmp) max = tmp;
1798     }
1799     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1800   }
1801 
1802   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1803   lrow = row - rstart;
1804 
1805   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1806   if (!v)   {pvA = 0; pvB = 0;}
1807   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1808   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1809   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1810   nztot = nzA + nzB;
1811 
1812   cmap = mat->garray;
1813   if (v  || idx) {
1814     if (nztot) {
1815       /* Sort by increasing column numbers, assuming A and B already sorted */
1816       PetscInt imark = -1;
1817       if (v) {
1818         *v = v_p = mat->rowvalues;
1819         for (i=0; i<nzB; i++) {
1820           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1821           else break;
1822         }
1823         imark = i;
1824         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1825         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1826       }
1827       if (idx) {
1828         *idx = idx_p = mat->rowindices;
1829         if (imark > -1) {
1830           for (i=0; i<imark; i++) {
1831             idx_p[i] = cmap[cworkB[i]];
1832           }
1833         } else {
1834           for (i=0; i<nzB; i++) {
1835             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1836             else break;
1837           }
1838           imark = i;
1839         }
1840         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1841         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1842       }
1843     } else {
1844       if (idx) *idx = 0;
1845       if (v)   *v   = 0;
1846     }
1847   }
1848   *nz  = nztot;
1849   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1850   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857 {
1858   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1859 
1860   PetscFunctionBegin;
1861   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1862   aij->getrowactive = PETSC_FALSE;
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatNorm_MPIAIJ"
1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1869 {
1870   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1871   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1872   PetscErrorCode ierr;
1873   PetscInt       i,j,cstart = mat->cmap->rstart;
1874   PetscReal      sum = 0.0;
1875   MatScalar      *v;
1876 
1877   PetscFunctionBegin;
1878   if (aij->size == 1) {
1879     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1880   } else {
1881     if (type == NORM_FROBENIUS) {
1882       v = amat->a;
1883       for (i=0; i<amat->nz; i++) {
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885       }
1886       v = bmat->a;
1887       for (i=0; i<bmat->nz; i++) {
1888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1889       }
1890       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       *norm = PetscSqrtReal(*norm);
1892     } else if (type == NORM_1) { /* max column norm */
1893       PetscReal *tmp,*tmp2;
1894       PetscInt  *jj,*garray = aij->garray;
1895       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1896       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1897       *norm = 0.0;
1898       v     = amat->a; jj = amat->j;
1899       for (j=0; j<amat->nz; j++) {
1900         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1901       }
1902       v = bmat->a; jj = bmat->j;
1903       for (j=0; j<bmat->nz; j++) {
1904         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905       }
1906       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       for (j=0; j<mat->cmap->N; j++) {
1908         if (tmp2[j] > *norm) *norm = tmp2[j];
1909       }
1910       ierr = PetscFree(tmp);CHKERRQ(ierr);
1911       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1912     } else if (type == NORM_INFINITY) { /* max row norm */
1913       PetscReal ntemp = 0.0;
1914       for (j=0; j<aij->A->rmap->n; j++) {
1915         v   = amat->a + amat->i[j];
1916         sum = 0.0;
1917         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         v = bmat->a + bmat->i[j];
1921         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922           sum += PetscAbsScalar(*v); v++;
1923         }
1924         if (sum > ntemp) ntemp = sum;
1925       }
1926       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1940   PetscInt       cstart = A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1952     PetscSFNode          *oloc;
1953     PETSC_UNUSED PetscSF sf;
1954 
1955     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1956     /* compute d_nnz for preallocation */
1957     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) {
1959       d_nnz[aj[i]]++;
1960       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1961     }
1962     /* compute local off-diagonal contributions */
1963     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1964     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1965     /* map those to global */
1966     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1967     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1968     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1969     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1970     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1971     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1972     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1973 
1974     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1975     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1976     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1977     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1978     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1979     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1980   } else {
1981     B    = *matout;
1982     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1983     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984   }
1985 
1986   /* copy over the A part */
1987   array = Aloc->a;
1988   row   = A->rmap->rstart;
1989   for (i=0; i<ma; i++) {
1990     ncol = ai[i+1]-ai[i];
1991     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; aj += ncol;
1994   }
1995   aj = Aloc->j;
1996   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1997 
1998   /* copy over the B part */
1999   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2000   array = Bloc->a;
2001   row   = A->rmap->rstart;
2002   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2003   cols_tmp = cols;
2004   for (i=0; i<mb; i++) {
2005     ncol = bi[i+1]-bi[i];
2006     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2007     row++;
2008     array += ncol; cols_tmp += ncol;
2009   }
2010   ierr = PetscFree(cols);CHKERRQ(ierr);
2011 
2012   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2015     *matout = B;
2016   } else {
2017     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2025 {
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            a    = aij->A,b = aij->B;
2028   PetscErrorCode ierr;
2029   PetscInt       s1,s2,s3;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2033   if (rr) {
2034     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2035     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2036     /* Overlap communication with computation. */
2037     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2038   }
2039   if (ll) {
2040     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2041     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2042     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2043   }
2044   /* scale  the diagonal block */
2045   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2046 
2047   if (rr) {
2048     /* Do a scatter end and then right scale the off-diagonal block */
2049     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2050     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2058 {
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatEqual_MPIAIJ"
2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2070 {
2071   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2072   Mat            a,b,c,d;
2073   PetscBool      flg;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   a = matA->A; b = matA->B;
2078   c = matB->A; d = matB->B;
2079 
2080   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2081   if (flg) {
2082     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2083   }
2084   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatCopy_MPIAIJ"
2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2095 
2096   PetscFunctionBegin;
2097   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2098   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2099     /* because of the column compression in the off-processor part of the matrix a->B,
2100        the number of columns in a->B and b->B may be different, hence we cannot call
2101        the MatCopy() directly on the two parts. If need be, we can provide a more
2102        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2103        then copying the submatrices */
2104     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2105   } else {
2106     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2107     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUp_MPIAIJ"
2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*
2124    Computes the number of nonzeros per row needed for preallocation when X and Y
2125    have different nonzero structure.
2126 */
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2130 {
2131   PetscInt       i,j,k,nzx,nzy;
2132 
2133   PetscFunctionBegin;
2134   /* Set the number of nonzeros in the new matrix */
2135   for (i=0; i<m; i++) {
2136     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2137     nzx = xi[i+1] - xi[i];
2138     nzy = yi[i+1] - yi[i];
2139     nnz[i] = 0;
2140     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2141       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2142       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2143       nnz[i]++;
2144     }
2145     for (; k<nzy; k++) nnz[i]++;
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2154 {
2155   PetscErrorCode ierr;
2156   PetscInt       m = Y->rmap->N;
2157   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2158   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatAXPY_MPIAIJ"
2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2168 {
2169   PetscErrorCode ierr;
2170   PetscInt       i;
2171   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2172   PetscBLASInt   bnz,one=1;
2173   Mat_SeqAIJ     *x,*y;
2174 
2175   PetscFunctionBegin;
2176   if (str == SAME_NONZERO_PATTERN) {
2177     PetscScalar alpha = a;
2178     x    = (Mat_SeqAIJ*)xx->A->data;
2179     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2180     y    = (Mat_SeqAIJ*)yy->A->data;
2181     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2182     x    = (Mat_SeqAIJ*)xx->B->data;
2183     y    = (Mat_SeqAIJ*)yy->B->data;
2184     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2185     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2186     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2187   } else if (str == SUBSET_NONZERO_PATTERN) {
2188     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2189 
2190     x = (Mat_SeqAIJ*)xx->B->data;
2191     y = (Mat_SeqAIJ*)yy->B->data;
2192     if (y->xtoy && y->XtoY != xx->B) {
2193       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2194       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2195     }
2196     if (!y->xtoy) { /* get xtoy */
2197       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2198       y->XtoY = xx->B;
2199       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2200     }
2201     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2202     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2203   } else {
2204     Mat      B;
2205     PetscInt *nnz_d,*nnz_o;
2206     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2207     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2208     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2209     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2210     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2211     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2212     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2213     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2215     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2216     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2217     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2218     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2219     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatConjugate_MPIAIJ"
2228 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2229 {
2230 #if defined(PETSC_USE_COMPLEX)
2231   PetscErrorCode ierr;
2232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2236   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2237 #else
2238   PetscFunctionBegin;
2239 #endif
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatRealPart_MPIAIJ"
2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2246 {
2247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2248   PetscErrorCode ierr;
2249 
2250   PetscFunctionBegin;
2251   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2252   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2265   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #if defined(PETSC_HAVE_PBGL)
2270 
2271 #include <boost/parallel/mpi/bsp_process_group.hpp>
2272 #include <boost/graph/distributed/ilu_default_graph.hpp>
2273 #include <boost/graph/distributed/ilu_0_block.hpp>
2274 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2275 #include <boost/graph/distributed/petsc/interface.hpp>
2276 #include <boost/multi_array.hpp>
2277 #include <boost/parallel/distributed_property_map->hpp>
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2281 /*
2282   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2283 */
2284 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2285 {
2286   namespace petsc = boost::distributed::petsc;
2287 
2288   namespace graph_dist = boost::graph::distributed;
2289   using boost::graph::distributed::ilu_default::process_group_type;
2290   using boost::graph::ilu_permuted;
2291 
2292   PetscBool      row_identity, col_identity;
2293   PetscContainer c;
2294   PetscInt       m, n, M, N;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2299   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2300   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2301   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2302 
2303   process_group_type pg;
2304   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2305   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2306   lgraph_type& level_graph = *lgraph_p;
2307   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2308 
2309   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2310   ilu_permuted(level_graph);
2311 
2312   /* put together the new matrix */
2313   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2314   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2315   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2316   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2317   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2318   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2319   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2320   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2321 
2322   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2323   ierr = PetscContainerSetPointer(c, lgraph_p);
2324   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2325   ierr = PetscContainerDestroy(&c);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2331 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2332 {
2333   PetscFunctionBegin;
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatSolve_MPIAIJ"
2339 /*
2340   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2341 */
2342 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2343 {
2344   namespace graph_dist = boost::graph::distributed;
2345 
2346   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2347   lgraph_type    *lgraph_p;
2348   PetscContainer c;
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2353   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2354   ierr = VecCopy(b, x);CHKERRQ(ierr);
2355 
2356   PetscScalar *array_x;
2357   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2358   PetscInt sx;
2359   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2360 
2361   PetscScalar *array_b;
2362   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2363   PetscInt sb;
2364   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2365 
2366   lgraph_type& level_graph = *lgraph_p;
2367   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2368 
2369   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2370   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2371   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2372 
2373   typedef boost::iterator_property_map<array_ref_type::iterator,
2374                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2375   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2376   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2377 
2378   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2379   PetscFunctionReturn(0);
2380 }
2381 #endif
2382 
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2387 {
2388   PetscMPIInt    rank,size;
2389   MPI_Comm       comm;
2390   PetscErrorCode ierr;
2391   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2392   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2393   PetscInt       *rowrange = mat->rmap->range;
2394   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2395   Mat            A = aij->A,B=aij->B,C=*matredundant;
2396   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2397   PetscScalar    *sbuf_a;
2398   PetscInt       nzlocal=a->nz+b->nz;
2399   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2400   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2401   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2402   MatScalar      *aworkA,*aworkB;
2403   PetscScalar    *vals;
2404   PetscMPIInt    tag1,tag2,tag3,imdex;
2405   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2406   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2407   MPI_Status     recv_status,*send_status;
2408   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2409   PetscInt       **rbuf_j=NULL;
2410   PetscScalar    **rbuf_a=NULL;
2411   Mat_Redundant  *redund =NULL;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2417   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2419 
2420   if (reuse == MAT_REUSE_MATRIX) {
2421     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2422     if (subsize == 1) {
2423       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2424       redund = c->redundant;
2425     } else {
2426       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2427       redund = c->redundant;
2428     }
2429     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2430 
2431     nsends    = redund->nsends;
2432     nrecvs    = redund->nrecvs;
2433     send_rank = redund->send_rank;
2434     recv_rank = redund->recv_rank;
2435     sbuf_nz   = redund->sbuf_nz;
2436     rbuf_nz   = redund->rbuf_nz;
2437     sbuf_j    = redund->sbuf_j;
2438     sbuf_a    = redund->sbuf_a;
2439     rbuf_j    = redund->rbuf_j;
2440     rbuf_a    = redund->rbuf_a;
2441   }
2442 
2443   if (reuse == MAT_INITIAL_MATRIX) {
2444     PetscInt    nleftover,np_subcomm;
2445 
2446     /* get the destination processors' id send_rank, nsends and nrecvs */
2447     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2448 
2449     np_subcomm = size/nsubcomm;
2450     nleftover  = size - nsubcomm*np_subcomm;
2451 
2452     /* block of codes below is specific for INTERLACED */
2453     /* ------------------------------------------------*/
2454     nsends = 0; nrecvs = 0;
2455     for (i=0; i<size; i++) {
2456       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2457         send_rank[nsends++] = i;
2458         recv_rank[nrecvs++] = i;
2459       }
2460     }
2461     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2462       i = size-nleftover-1;
2463       j = 0;
2464       while (j < nsubcomm - nleftover) {
2465         send_rank[nsends++] = i;
2466         i--; j++;
2467       }
2468     }
2469 
2470     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2471       for (i=0; i<nleftover; i++) {
2472         recv_rank[nrecvs++] = size-nleftover+i;
2473       }
2474     }
2475     /*----------------------------------------------*/
2476 
2477     /* allocate sbuf_j, sbuf_a */
2478     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2480     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2481     /*
2482     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2483     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2484      */
2485   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2486 
2487   /* copy mat's local entries into the buffers */
2488   if (reuse == MAT_INITIAL_MATRIX) {
2489     rownz_max = 0;
2490     rptr      = sbuf_j;
2491     cols      = sbuf_j + rend-rstart + 1;
2492     vals      = sbuf_a;
2493     rptr[0]   = 0;
2494     for (i=0; i<rend-rstart; i++) {
2495       row    = i + rstart;
2496       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2497       ncols  = nzA + nzB;
2498       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2499       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2500       /* load the column indices for this row into cols */
2501       lwrite = 0;
2502       for (l=0; l<nzB; l++) {
2503         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2504           vals[lwrite]   = aworkB[l];
2505           cols[lwrite++] = ctmp;
2506         }
2507       }
2508       for (l=0; l<nzA; l++) {
2509         vals[lwrite]   = aworkA[l];
2510         cols[lwrite++] = cstart + cworkA[l];
2511       }
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       vals     += ncols;
2519       cols     += ncols;
2520       rptr[i+1] = rptr[i] + ncols;
2521       if (rownz_max < ncols) rownz_max = ncols;
2522     }
2523     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2524   } else { /* only copy matrix values into sbuf_a */
2525     rptr    = sbuf_j;
2526     vals    = sbuf_a;
2527     rptr[0] = 0;
2528     for (i=0; i<rend-rstart; i++) {
2529       row    = i + rstart;
2530       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2531       ncols  = nzA + nzB;
2532       cworkB = b->j + b->i[i];
2533       aworkA = a->a + a->i[i];
2534       aworkB = b->a + b->i[i];
2535       lwrite = 0;
2536       for (l=0; l<nzB; l++) {
2537         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2538       }
2539       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2540       for (l=0; l<nzB; l++) {
2541         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2542       }
2543       vals     += ncols;
2544       rptr[i+1] = rptr[i] + ncols;
2545     }
2546   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547 
2548   /* send nzlocal to others, and recv other's nzlocal */
2549   /*--------------------------------------------------*/
2550   if (reuse == MAT_INITIAL_MATRIX) {
2551     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2552 
2553     s_waits2 = s_waits3 + nsends;
2554     s_waits1 = s_waits2 + nsends;
2555     r_waits1 = s_waits1 + nsends;
2556     r_waits2 = r_waits1 + nrecvs;
2557     r_waits3 = r_waits2 + nrecvs;
2558   } else {
2559     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2560 
2561     r_waits3 = s_waits3 + nsends;
2562   }
2563 
2564   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2565   if (reuse == MAT_INITIAL_MATRIX) {
2566     /* get new tags to keep the communication clean */
2567     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2568     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2569     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2570 
2571     /* post receives of other's nzlocal */
2572     for (i=0; i<nrecvs; i++) {
2573       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* send nzlocal to others */
2576     for (i=0; i<nsends; i++) {
2577       sbuf_nz[i] = nzlocal;
2578       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2579     }
2580     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2581     count = nrecvs;
2582     while (count) {
2583       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2584 
2585       recv_rank[imdex] = recv_status.MPI_SOURCE;
2586       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2587       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2588 
2589       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2590 
2591       rbuf_nz[imdex] += i + 2;
2592 
2593       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2594       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2595       count--;
2596     }
2597     /* wait on sends of nzlocal */
2598     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2599     /* send mat->i,j to others, and recv from other's */
2600     /*------------------------------------------------*/
2601     for (i=0; i<nsends; i++) {
2602       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2603       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2604     }
2605     /* wait on receives of mat->i,j */
2606     /*------------------------------*/
2607     count = nrecvs;
2608     while (count) {
2609       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2610       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2611       count--;
2612     }
2613     /* wait on sends of mat->i,j */
2614     /*---------------------------*/
2615     if (nsends) {
2616       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2617     }
2618   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2619 
2620   /* post receives, send and receive mat->a */
2621   /*----------------------------------------*/
2622   for (imdex=0; imdex<nrecvs; imdex++) {
2623     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2624   }
2625   for (i=0; i<nsends; i++) {
2626     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2627   }
2628   count = nrecvs;
2629   while (count) {
2630     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2631     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2632     count--;
2633   }
2634   if (nsends) {
2635     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2636   }
2637 
2638   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2639 
2640   /* create redundant matrix */
2641   /*-------------------------*/
2642   if (reuse == MAT_INITIAL_MATRIX) {
2643     const PetscInt *range;
2644     PetscInt       rstart_sub,rend_sub,mloc_sub;
2645 
2646     /* compute rownz_max for preallocation */
2647     for (imdex=0; imdex<nrecvs; imdex++) {
2648       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2649       rptr = rbuf_j[imdex];
2650       for (i=0; i<j; i++) {
2651         ncols = rptr[i+1] - rptr[i];
2652         if (rownz_max < ncols) rownz_max = ncols;
2653       }
2654     }
2655 
2656     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2657 
2658     /* get local size of redundant matrix
2659        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2660     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2661     rstart_sub = range[nsubcomm*subrank];
2662     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2663       rend_sub = range[nsubcomm*(subrank+1)];
2664     } else {
2665       rend_sub = mat->rmap->N;
2666     }
2667     mloc_sub = rend_sub - rstart_sub;
2668 
2669     if (M == N) {
2670       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2671     } else { /* non-square matrix */
2672       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2673     }
2674     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2675     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2676     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2677     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2678   } else {
2679     C = *matredundant;
2680   }
2681 
2682   /* insert local matrix entries */
2683   rptr = sbuf_j;
2684   cols = sbuf_j + rend-rstart + 1;
2685   vals = sbuf_a;
2686   for (i=0; i<rend-rstart; i++) {
2687     row   = i + rstart;
2688     ncols = rptr[i+1] - rptr[i];
2689     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2690     vals += ncols;
2691     cols += ncols;
2692   }
2693   /* insert received matrix entries */
2694   for (imdex=0; imdex<nrecvs; imdex++) {
2695     rstart = rowrange[recv_rank[imdex]];
2696     rend   = rowrange[recv_rank[imdex]+1];
2697     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2698     rptr   = rbuf_j[imdex];
2699     cols   = rbuf_j[imdex] + rend-rstart + 1;
2700     vals   = rbuf_a[imdex];
2701     for (i=0; i<rend-rstart; i++) {
2702       row   = i + rstart;
2703       ncols = rptr[i+1] - rptr[i];
2704       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2705       vals += ncols;
2706       cols += ncols;
2707     }
2708   }
2709   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711 
2712   if (reuse == MAT_INITIAL_MATRIX) {
2713     *matredundant = C;
2714 
2715     /* create a supporting struct and attach it to C for reuse */
2716     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2717     if (subsize == 1) {
2718       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2719       c->redundant = redund;
2720     } else {
2721       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2722       c->redundant = redund;
2723     }
2724 
2725     redund->nzlocal   = nzlocal;
2726     redund->nsends    = nsends;
2727     redund->nrecvs    = nrecvs;
2728     redund->send_rank = send_rank;
2729     redund->recv_rank = recv_rank;
2730     redund->sbuf_nz   = sbuf_nz;
2731     redund->rbuf_nz   = rbuf_nz;
2732     redund->sbuf_j    = sbuf_j;
2733     redund->sbuf_a    = sbuf_a;
2734     redund->rbuf_j    = rbuf_j;
2735     redund->rbuf_a    = rbuf_a;
2736     redund->psubcomm  = NULL;
2737   }
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2743 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2744 {
2745   PetscErrorCode ierr;
2746   MPI_Comm       comm;
2747   PetscMPIInt    size,subsize;
2748   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2749   Mat_Redundant  *redund=NULL;
2750   PetscSubcomm   psubcomm=NULL;
2751   MPI_Comm       subcomm_in=subcomm;
2752   Mat            *matseq;
2753   IS             isrow,iscol;
2754 
2755   PetscFunctionBegin;
2756   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2757     if (reuse ==  MAT_INITIAL_MATRIX) {
2758       /* create psubcomm, then get subcomm */
2759       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2760       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2761       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2762 
2763       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2764       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2765       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2766       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2767       subcomm = psubcomm->comm;
2768     } else { /* retrieve psubcomm and subcomm */
2769       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2770       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2771       if (subsize == 1) {
2772         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2773         redund = c->redundant;
2774       } else {
2775         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2776         redund = c->redundant;
2777       }
2778       psubcomm = redund->psubcomm;
2779     }
2780     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2781       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2782       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2783         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2784         if (subsize == 1) {
2785           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2786           c->redundant->psubcomm = psubcomm;
2787         } else {
2788           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2789           c->redundant->psubcomm = psubcomm ;
2790         }
2791       }
2792       PetscFunctionReturn(0);
2793     }
2794   }
2795 
2796   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2797   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2798   if (reuse == MAT_INITIAL_MATRIX) {
2799     /* create a local sequential matrix matseq[0] */
2800     mloc_sub = PETSC_DECIDE;
2801     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2802     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2803     rstart = rend - mloc_sub;
2804     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2805     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2806   } else { /* reuse == MAT_REUSE_MATRIX */
2807     if (subsize == 1) {
2808       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2809       redund = c->redundant;
2810     } else {
2811       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2812       redund = c->redundant;
2813     }
2814 
2815     isrow  = redund->isrow;
2816     iscol  = redund->iscol;
2817     matseq = redund->matseq;
2818   }
2819   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2820   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2821 
2822   if (reuse == MAT_INITIAL_MATRIX) {
2823     /* create a supporting struct and attach it to C for reuse */
2824     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2825     if (subsize == 1) {
2826       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2827       c->redundant = redund;
2828     } else {
2829       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2830       c->redundant = redund;
2831     }
2832     redund->isrow    = isrow;
2833     redund->iscol    = iscol;
2834     redund->matseq   = matseq;
2835     redund->psubcomm = psubcomm;
2836   }
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843 {
2844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2845   PetscErrorCode ierr;
2846   PetscInt       i,*idxb = 0;
2847   PetscScalar    *va,*vb;
2848   Vec            vtmp;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2852   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2853   if (idx) {
2854     for (i=0; i<A->rmap->n; i++) {
2855       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856     }
2857   }
2858 
2859   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2860   if (idx) {
2861     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2862   }
2863   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2864   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2865 
2866   for (i=0; i<A->rmap->n; i++) {
2867     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2868       va[i] = vb[i];
2869       if (idx) idx[i] = a->garray[idxb[i]];
2870     }
2871   }
2872 
2873   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2874   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2875   ierr = PetscFree(idxb);CHKERRQ(ierr);
2876   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883 {
2884   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2885   PetscErrorCode ierr;
2886   PetscInt       i,*idxb = 0;
2887   PetscScalar    *va,*vb;
2888   Vec            vtmp;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2892   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2893   if (idx) {
2894     for (i=0; i<A->cmap->n; i++) {
2895       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2896     }
2897   }
2898 
2899   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2900   if (idx) {
2901     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2902   }
2903   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2904   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2905 
2906   for (i=0; i<A->rmap->n; i++) {
2907     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2908       va[i] = vb[i];
2909       if (idx) idx[i] = a->garray[idxb[i]];
2910     }
2911   }
2912 
2913   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2914   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2915   ierr = PetscFree(idxb);CHKERRQ(ierr);
2916   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2923 {
2924   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2925   PetscInt       n      = A->rmap->n;
2926   PetscInt       cstart = A->cmap->rstart;
2927   PetscInt       *cmap  = mat->garray;
2928   PetscInt       *diagIdx, *offdiagIdx;
2929   Vec            diagV, offdiagV;
2930   PetscScalar    *a, *diagA, *offdiagA;
2931   PetscInt       r;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2936   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2938   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2939   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2940   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   for (r = 0; r < n; ++r) {
2944     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2945       a[r]   = diagA[r];
2946       idx[r] = cstart + diagIdx[r];
2947     } else {
2948       a[r]   = offdiagA[r];
2949       idx[r] = cmap[offdiagIdx[r]];
2950     }
2951   }
2952   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2953   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2955   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2956   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2957   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964 {
2965   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2966   PetscInt       n      = A->rmap->n;
2967   PetscInt       cstart = A->cmap->rstart;
2968   PetscInt       *cmap  = mat->garray;
2969   PetscInt       *diagIdx, *offdiagIdx;
2970   Vec            diagV, offdiagV;
2971   PetscScalar    *a, *diagA, *offdiagA;
2972   PetscInt       r;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2977   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2979   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2980   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2981   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   for (r = 0; r < n; ++r) {
2985     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2986       a[r]   = diagA[r];
2987       idx[r] = cstart + diagIdx[r];
2988     } else {
2989       a[r]   = offdiagA[r];
2990       idx[r] = cmap[offdiagIdx[r]];
2991     }
2992   }
2993   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2997   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2998   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3005 {
3006   PetscErrorCode ierr;
3007   Mat            *dummy;
3008 
3009   PetscFunctionBegin;
3010   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3011   *newmat = *dummy;
3012   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3018 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3019 {
3020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3030 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3031 {
3032   PetscErrorCode ierr;
3033   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3034 
3035   PetscFunctionBegin;
3036   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3037   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3038   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 /* -------------------------------------------------------------------*/
3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3045                                        MatGetRow_MPIAIJ,
3046                                        MatRestoreRow_MPIAIJ,
3047                                        MatMult_MPIAIJ,
3048                                 /* 4*/ MatMultAdd_MPIAIJ,
3049                                        MatMultTranspose_MPIAIJ,
3050                                        MatMultTransposeAdd_MPIAIJ,
3051 #if defined(PETSC_HAVE_PBGL)
3052                                        MatSolve_MPIAIJ,
3053 #else
3054                                        0,
3055 #endif
3056                                        0,
3057                                        0,
3058                                 /*10*/ 0,
3059                                        0,
3060                                        0,
3061                                        MatSOR_MPIAIJ,
3062                                        MatTranspose_MPIAIJ,
3063                                 /*15*/ MatGetInfo_MPIAIJ,
3064                                        MatEqual_MPIAIJ,
3065                                        MatGetDiagonal_MPIAIJ,
3066                                        MatDiagonalScale_MPIAIJ,
3067                                        MatNorm_MPIAIJ,
3068                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3069                                        MatAssemblyEnd_MPIAIJ,
3070                                        MatSetOption_MPIAIJ,
3071                                        MatZeroEntries_MPIAIJ,
3072                                 /*24*/ MatZeroRows_MPIAIJ,
3073                                        0,
3074 #if defined(PETSC_HAVE_PBGL)
3075                                        0,
3076 #else
3077                                        0,
3078 #endif
3079                                        0,
3080                                        0,
3081                                 /*29*/ MatSetUp_MPIAIJ,
3082 #if defined(PETSC_HAVE_PBGL)
3083                                        0,
3084 #else
3085                                        0,
3086 #endif
3087                                        0,
3088                                        0,
3089                                        0,
3090                                 /*34*/ MatDuplicate_MPIAIJ,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*39*/ MatAXPY_MPIAIJ,
3096                                        MatGetSubMatrices_MPIAIJ,
3097                                        MatIncreaseOverlap_MPIAIJ,
3098                                        MatGetValues_MPIAIJ,
3099                                        MatCopy_MPIAIJ,
3100                                 /*44*/ MatGetRowMax_MPIAIJ,
3101                                        MatScale_MPIAIJ,
3102                                        0,
3103                                        0,
3104                                        MatZeroRowsColumns_MPIAIJ,
3105                                 /*49*/ MatSetRandom_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3111                                        0,
3112                                        MatSetUnfactored_MPIAIJ,
3113                                        MatPermute_MPIAIJ,
3114                                        0,
3115                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3116                                        MatDestroy_MPIAIJ,
3117                                        MatView_MPIAIJ,
3118                                        0,
3119                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3120                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3121                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3126                                        MatGetRowMinAbs_MPIAIJ,
3127                                        0,
3128                                        MatSetColoring_MPIAIJ,
3129                                        0,
3130                                        MatSetValuesAdifor_MPIAIJ,
3131                                 /*75*/ MatFDColoringApply_AIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                        MatFindZeroDiagonals_MPIAIJ,
3136                                 /*80*/ 0,
3137                                        0,
3138                                        0,
3139                                 /*83*/ MatLoad_MPIAIJ,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3146                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3147                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3148                                        MatPtAP_MPIAIJ_MPIAIJ,
3149                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3150                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                 /*99*/ 0,
3156                                        0,
3157                                        0,
3158                                        MatConjugate_MPIAIJ,
3159                                        0,
3160                                 /*104*/MatSetValuesRow_MPIAIJ,
3161                                        MatRealPart_MPIAIJ,
3162                                        MatImaginaryPart_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                 /*109*/0,
3166                                        MatGetRedundantMatrix_MPIAIJ,
3167                                        MatGetRowMin_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                 /*119*/0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        MatGetMultiProcBlock_MPIAIJ,
3180                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3181                                        MatGetColumnNorms_MPIAIJ,
3182                                        MatInvertBlockDiagonal_MPIAIJ,
3183                                        0,
3184                                        MatGetSubMatricesParallel_MPIAIJ,
3185                                 /*129*/0,
3186                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3187                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3188                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3189                                        0,
3190                                 /*134*/0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                 /*139*/0,
3196                                        0,
3197                                        0,
3198                                        MatFDColoringSetUp_MPIXAIJ
3199 };
3200 
3201 /* ----------------------------------------------------------------------------------------*/
3202 
3203 #undef __FUNCT__
3204 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3205 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3206 {
3207   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3208   PetscErrorCode ierr;
3209 
3210   PetscFunctionBegin;
3211   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3212   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3218 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3219 {
3220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3225   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3231 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3232 {
3233   Mat_MPIAIJ     *b;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3238   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3239   b = (Mat_MPIAIJ*)B->data;
3240 
3241   if (!B->preallocated) {
3242     /* Explicitly create 2 MATSEQAIJ matrices. */
3243     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3244     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3245     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3246     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3247     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3248     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3249     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3250     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3251     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3252     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3253   }
3254 
3255   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3256   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3257   B->preallocated = PETSC_TRUE;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3264 {
3265   Mat            mat;
3266   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3267   PetscErrorCode ierr;
3268 
3269   PetscFunctionBegin;
3270   *newmat = 0;
3271   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3272   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3273   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3274   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3275   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3276   a       = (Mat_MPIAIJ*)mat->data;
3277 
3278   mat->factortype   = matin->factortype;
3279   mat->assembled    = PETSC_TRUE;
3280   mat->insertmode   = NOT_SET_VALUES;
3281   mat->preallocated = PETSC_TRUE;
3282 
3283   a->size         = oldmat->size;
3284   a->rank         = oldmat->rank;
3285   a->donotstash   = oldmat->donotstash;
3286   a->roworiented  = oldmat->roworiented;
3287   a->rowindices   = 0;
3288   a->rowvalues    = 0;
3289   a->getrowactive = PETSC_FALSE;
3290 
3291   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3292   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3293 
3294   if (oldmat->colmap) {
3295 #if defined(PETSC_USE_CTABLE)
3296     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3297 #else
3298     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3299     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301 #endif
3302   } else a->colmap = 0;
3303   if (oldmat->garray) {
3304     PetscInt len;
3305     len  = oldmat->B->cmap->n;
3306     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3307     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3308     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3309   } else a->garray = 0;
3310 
3311   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3312   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3313   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3314   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3315   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3316   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3317   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3318   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3319   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3320   *newmat = mat;
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 
3325 
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatLoad_MPIAIJ"
3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329 {
3330   PetscScalar    *vals,*svals;
3331   MPI_Comm       comm;
3332   PetscErrorCode ierr;
3333   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3334   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3336   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3337   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3338   int            fd;
3339   PetscInt       bs = newMat->rmap->bs;
3340 
3341   PetscFunctionBegin;
3342   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3344   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3345   if (!rank) {
3346     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3347     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3348     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3349   }
3350 
3351   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3352   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3353   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3354   if (bs < 0) bs = 1;
3355 
3356   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3357 
3358   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3359   M    = header[1]; N = header[2];
3360   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3361   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3362   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3363 
3364   /* If global sizes are set, check if they are consistent with that given in the file */
3365   if (sizesset) {
3366     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3367   }
3368   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3369   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3370 
3371   /* determine ownership of all (block) rows */
3372   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3373   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3374   else m = newMat->rmap->n; /* Set by user */
3375 
3376   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3377   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3378 
3379   /* First process needs enough room for process with most rows */
3380   if (!rank) {
3381     mmax = rowners[1];
3382     for (i=2; i<=size; i++) {
3383       mmax = PetscMax(mmax, rowners[i]);
3384     }
3385   } else mmax = -1;             /* unused, but compilers complain */
3386 
3387   rowners[0] = 0;
3388   for (i=2; i<=size; i++) {
3389     rowners[i] += rowners[i-1];
3390   }
3391   rstart = rowners[rank];
3392   rend   = rowners[rank+1];
3393 
3394   /* distribute row lengths to all processors */
3395   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3396   if (!rank) {
3397     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3398     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3399     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3400     for (j=0; j<m; j++) {
3401       procsnz[0] += ourlens[j];
3402     }
3403     for (i=1; i<size; i++) {
3404       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3405       /* calculate the number of nonzeros on each processor */
3406       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3407         procsnz[i] += rowlengths[j];
3408       }
3409       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3410     }
3411     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3412   } else {
3413     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3414   }
3415 
3416   if (!rank) {
3417     /* determine max buffer needed and allocate it */
3418     maxnz = 0;
3419     for (i=0; i<size; i++) {
3420       maxnz = PetscMax(maxnz,procsnz[i]);
3421     }
3422     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3423 
3424     /* read in my part of the matrix column indices  */
3425     nz   = procsnz[0];
3426     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3427     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3428 
3429     /* read in every one elses and ship off */
3430     for (i=1; i<size; i++) {
3431       nz   = procsnz[i];
3432       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3433       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3434     }
3435     ierr = PetscFree(cols);CHKERRQ(ierr);
3436   } else {
3437     /* determine buffer space needed for message */
3438     nz = 0;
3439     for (i=0; i<m; i++) {
3440       nz += ourlens[i];
3441     }
3442     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3443 
3444     /* receive message of column indices*/
3445     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3446   }
3447 
3448   /* determine column ownership if matrix is not square */
3449   if (N != M) {
3450     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3451     else n = newMat->cmap->n;
3452     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3453     cstart = cend - n;
3454   } else {
3455     cstart = rstart;
3456     cend   = rend;
3457     n      = cend - cstart;
3458   }
3459 
3460   /* loop over local rows, determining number of off diagonal entries */
3461   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3462   jj   = 0;
3463   for (i=0; i<m; i++) {
3464     for (j=0; j<ourlens[i]; j++) {
3465       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3466       jj++;
3467     }
3468   }
3469 
3470   for (i=0; i<m; i++) {
3471     ourlens[i] -= offlens[i];
3472   }
3473   if (!sizesset) {
3474     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3475   }
3476 
3477   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3478 
3479   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3480 
3481   for (i=0; i<m; i++) {
3482     ourlens[i] += offlens[i];
3483   }
3484 
3485   if (!rank) {
3486     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3487 
3488     /* read in my part of the matrix numerical values  */
3489     nz   = procsnz[0];
3490     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3491 
3492     /* insert into matrix */
3493     jj      = rstart;
3494     smycols = mycols;
3495     svals   = vals;
3496     for (i=0; i<m; i++) {
3497       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3498       smycols += ourlens[i];
3499       svals   += ourlens[i];
3500       jj++;
3501     }
3502 
3503     /* read in other processors and ship out */
3504     for (i=1; i<size; i++) {
3505       nz   = procsnz[i];
3506       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3507       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3510   } else {
3511     /* receive numeric values */
3512     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3513 
3514     /* receive message of values*/
3515     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3516 
3517     /* insert into matrix */
3518     jj      = rstart;
3519     smycols = mycols;
3520     svals   = vals;
3521     for (i=0; i<m; i++) {
3522       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3523       smycols += ourlens[i];
3524       svals   += ourlens[i];
3525       jj++;
3526     }
3527   }
3528   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3529   ierr = PetscFree(vals);CHKERRQ(ierr);
3530   ierr = PetscFree(mycols);CHKERRQ(ierr);
3531   ierr = PetscFree(rowners);CHKERRQ(ierr);
3532   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3540 {
3541   PetscErrorCode ierr;
3542   IS             iscol_local;
3543   PetscInt       csize;
3544 
3545   PetscFunctionBegin;
3546   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3547   if (call == MAT_REUSE_MATRIX) {
3548     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3549     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3550   } else {
3551     PetscInt cbs;
3552     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3553     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3554     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3555   }
3556   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3557   if (call == MAT_INITIAL_MATRIX) {
3558     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3559     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3560   }
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3567 /*
3568     Not great since it makes two copies of the submatrix, first an SeqAIJ
3569   in local and then by concatenating the local matrices the end result.
3570   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3571 
3572   Note: This requires a sequential iscol with all indices.
3573 */
3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3575 {
3576   PetscErrorCode ierr;
3577   PetscMPIInt    rank,size;
3578   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3579   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3580   PetscBool      allcolumns, colflag;
3581   Mat            M,Mreuse;
3582   MatScalar      *vwork,*aa;
3583   MPI_Comm       comm;
3584   Mat_SeqAIJ     *aij;
3585 
3586   PetscFunctionBegin;
3587   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3590 
3591   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3592   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3593   if (colflag && ncol == mat->cmap->N) {
3594     allcolumns = PETSC_TRUE;
3595   } else {
3596     allcolumns = PETSC_FALSE;
3597   }
3598   if (call ==  MAT_REUSE_MATRIX) {
3599     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3600     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3601     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3602   } else {
3603     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3604   }
3605 
3606   /*
3607       m - number of local rows
3608       n - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3612   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3613   if (call == MAT_INITIAL_MATRIX) {
3614     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3615     ii  = aij->i;
3616     jj  = aij->j;
3617 
3618     /*
3619         Determine the number of non-zeros in the diagonal and off-diagonal
3620         portions of the matrix in order to do correct preallocation
3621     */
3622 
3623     /* first get start and end of "diagonal" columns */
3624     if (csize == PETSC_DECIDE) {
3625       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3626       if (mglobal == n) { /* square matrix */
3627         nlocal = m;
3628       } else {
3629         nlocal = n/size + ((n % size) > rank);
3630       }
3631     } else {
3632       nlocal = csize;
3633     }
3634     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3635     rstart = rend - nlocal;
3636     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3637 
3638     /* next, compute all the lengths */
3639     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3640     olens = dlens + m;
3641     for (i=0; i<m; i++) {
3642       jend = ii[i+1] - ii[i];
3643       olen = 0;
3644       dlen = 0;
3645       for (j=0; j<jend; j++) {
3646         if (*jj < rstart || *jj >= rend) olen++;
3647         else dlen++;
3648         jj++;
3649       }
3650       olens[i] = olen;
3651       dlens[i] = dlen;
3652     }
3653     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3654     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3655     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3656     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3657     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3658     ierr = PetscFree(dlens);CHKERRQ(ierr);
3659   } else {
3660     PetscInt ml,nl;
3661 
3662     M    = *newmat;
3663     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3664     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3665     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3666     /*
3667          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3668        rather than the slower MatSetValues().
3669     */
3670     M->was_assembled = PETSC_TRUE;
3671     M->assembled     = PETSC_FALSE;
3672   }
3673   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3674   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3675   ii   = aij->i;
3676   jj   = aij->j;
3677   aa   = aij->a;
3678   for (i=0; i<m; i++) {
3679     row   = rstart + i;
3680     nz    = ii[i+1] - ii[i];
3681     cwork = jj;     jj += nz;
3682     vwork = aa;     aa += nz;
3683     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3684   }
3685 
3686   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   *newmat = M;
3689 
3690   /* save submatrix used in processor for next request */
3691   if (call ==  MAT_INITIAL_MATRIX) {
3692     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3693     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3694   }
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3700 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3701 {
3702   PetscInt       m,cstart, cend,j,nnz,i,d;
3703   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3704   const PetscInt *JJ;
3705   PetscScalar    *values;
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3710 
3711   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3712   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3713   m      = B->rmap->n;
3714   cstart = B->cmap->rstart;
3715   cend   = B->cmap->rend;
3716   rstart = B->rmap->rstart;
3717 
3718   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3719 
3720 #if defined(PETSC_USE_DEBUGGING)
3721   for (i=0; i<m; i++) {
3722     nnz = Ii[i+1]- Ii[i];
3723     JJ  = J + Ii[i];
3724     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3725     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3726     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3727   }
3728 #endif
3729 
3730   for (i=0; i<m; i++) {
3731     nnz     = Ii[i+1]- Ii[i];
3732     JJ      = J + Ii[i];
3733     nnz_max = PetscMax(nnz_max,nnz);
3734     d       = 0;
3735     for (j=0; j<nnz; j++) {
3736       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3737     }
3738     d_nnz[i] = d;
3739     o_nnz[i] = nnz - d;
3740   }
3741   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3742   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3743 
3744   if (v) values = (PetscScalar*)v;
3745   else {
3746     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3747   }
3748 
3749   for (i=0; i<m; i++) {
3750     ii   = i + rstart;
3751     nnz  = Ii[i+1]- Ii[i];
3752     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3753   }
3754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756 
3757   if (!v) {
3758     ierr = PetscFree(values);CHKERRQ(ierr);
3759   }
3760   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3766 /*@
3767    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3768    (the default parallel PETSc format).
3769 
3770    Collective on MPI_Comm
3771 
3772    Input Parameters:
3773 +  B - the matrix
3774 .  i - the indices into j for the start of each local row (starts with zero)
3775 .  j - the column indices for each local row (starts with zero)
3776 -  v - optional values in the matrix
3777 
3778    Level: developer
3779 
3780    Notes:
3781        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3782      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3783      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3784 
3785        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3786 
3787        The format which is used for the sparse matrix input, is equivalent to a
3788     row-major ordering.. i.e for the following matrix, the input data expected is
3789     as shown:
3790 
3791         1 0 0
3792         2 0 3     P0
3793        -------
3794         4 5 6     P1
3795 
3796      Process0 [P0]: rows_owned=[0,1]
3797         i =  {0,1,3}  [size = nrow+1  = 2+1]
3798         j =  {0,0,2}  [size = nz = 6]
3799         v =  {1,2,3}  [size = nz = 6]
3800 
3801      Process1 [P1]: rows_owned=[2]
3802         i =  {0,3}    [size = nrow+1  = 1+1]
3803         j =  {0,1,2}  [size = nz = 6]
3804         v =  {4,5,6}  [size = nz = 6]
3805 
3806 .keywords: matrix, aij, compressed row, sparse, parallel
3807 
3808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3809           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3810 @*/
3811 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3812 {
3813   PetscErrorCode ierr;
3814 
3815   PetscFunctionBegin;
3816   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 #undef __FUNCT__
3821 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3822 /*@C
3823    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3824    (the default parallel PETSc format).  For good matrix assembly performance
3825    the user should preallocate the matrix storage by setting the parameters
3826    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3827    performance can be increased by more than a factor of 50.
3828 
3829    Collective on MPI_Comm
3830 
3831    Input Parameters:
3832 +  B - the matrix
3833 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3834            (same value is used for all local rows)
3835 .  d_nnz - array containing the number of nonzeros in the various rows of the
3836            DIAGONAL portion of the local submatrix (possibly different for each row)
3837            or NULL, if d_nz is used to specify the nonzero structure.
3838            The size of this array is equal to the number of local rows, i.e 'm'.
3839            For matrices that will be factored, you must leave room for (and set)
3840            the diagonal entry even if it is zero.
3841 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3842            submatrix (same value is used for all local rows).
3843 -  o_nnz - array containing the number of nonzeros in the various rows of the
3844            OFF-DIAGONAL portion of the local submatrix (possibly different for
3845            each row) or NULL, if o_nz is used to specify the nonzero
3846            structure. The size of this array is equal to the number
3847            of local rows, i.e 'm'.
3848 
3849    If the *_nnz parameter is given then the *_nz parameter is ignored
3850 
3851    The AIJ format (also called the Yale sparse matrix format or
3852    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3853    storage.  The stored row and column indices begin with zero.
3854    See Users-Manual: ch_mat for details.
3855 
3856    The parallel matrix is partitioned such that the first m0 rows belong to
3857    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3858    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3859 
3860    The DIAGONAL portion of the local submatrix of a processor can be defined
3861    as the submatrix which is obtained by extraction the part corresponding to
3862    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3863    first row that belongs to the processor, r2 is the last row belonging to
3864    the this processor, and c1-c2 is range of indices of the local part of a
3865    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3866    common case of a square matrix, the row and column ranges are the same and
3867    the DIAGONAL part is also square. The remaining portion of the local
3868    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3869 
3870    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3871 
3872    You can call MatGetInfo() to get information on how effective the preallocation was;
3873    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3874    You can also run with the option -info and look for messages with the string
3875    malloc in them to see if additional memory allocation was needed.
3876 
3877    Example usage:
3878 
3879    Consider the following 8x8 matrix with 34 non-zero values, that is
3880    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3881    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3882    as follows:
3883 
3884 .vb
3885             1  2  0  |  0  3  0  |  0  4
3886     Proc0   0  5  6  |  7  0  0  |  8  0
3887             9  0 10  | 11  0  0  | 12  0
3888     -------------------------------------
3889            13  0 14  | 15 16 17  |  0  0
3890     Proc1   0 18  0  | 19 20 21  |  0  0
3891             0  0  0  | 22 23  0  | 24  0
3892     -------------------------------------
3893     Proc2  25 26 27  |  0  0 28  | 29  0
3894            30  0  0  | 31 32 33  |  0 34
3895 .ve
3896 
3897    This can be represented as a collection of submatrices as:
3898 
3899 .vb
3900       A B C
3901       D E F
3902       G H I
3903 .ve
3904 
3905    Where the submatrices A,B,C are owned by proc0, D,E,F are
3906    owned by proc1, G,H,I are owned by proc2.
3907 
3908    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3909    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3910    The 'M','N' parameters are 8,8, and have the same values on all procs.
3911 
3912    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3913    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3914    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3915    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3916    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3917    matrix, ans [DF] as another SeqAIJ matrix.
3918 
3919    When d_nz, o_nz parameters are specified, d_nz storage elements are
3920    allocated for every row of the local diagonal submatrix, and o_nz
3921    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3922    One way to choose d_nz and o_nz is to use the max nonzerors per local
3923    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3924    In this case, the values of d_nz,o_nz are:
3925 .vb
3926      proc0 : dnz = 2, o_nz = 2
3927      proc1 : dnz = 3, o_nz = 2
3928      proc2 : dnz = 1, o_nz = 4
3929 .ve
3930    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3931    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3932    for proc3. i.e we are using 12+15+10=37 storage locations to store
3933    34 values.
3934 
3935    When d_nnz, o_nnz parameters are specified, the storage is specified
3936    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3937    In the above case the values for d_nnz,o_nnz are:
3938 .vb
3939      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3940      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3941      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3942 .ve
3943    Here the space allocated is sum of all the above values i.e 34, and
3944    hence pre-allocation is perfect.
3945 
3946    Level: intermediate
3947 
3948 .keywords: matrix, aij, compressed row, sparse, parallel
3949 
3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3951           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3952 @*/
3953 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3954 {
3955   PetscErrorCode ierr;
3956 
3957   PetscFunctionBegin;
3958   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3959   PetscValidType(B,1);
3960   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 #undef __FUNCT__
3965 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3966 /*@
3967      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3968          CSR format the local rows.
3969 
3970    Collective on MPI_Comm
3971 
3972    Input Parameters:
3973 +  comm - MPI communicator
3974 .  m - number of local rows (Cannot be PETSC_DECIDE)
3975 .  n - This value should be the same as the local size used in creating the
3976        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3977        calculated if N is given) For square matrices n is almost always m.
3978 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3979 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3980 .   i - row indices
3981 .   j - column indices
3982 -   a - matrix values
3983 
3984    Output Parameter:
3985 .   mat - the matrix
3986 
3987    Level: intermediate
3988 
3989    Notes:
3990        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3991      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3992      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3993 
3994        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3995 
3996        The format which is used for the sparse matrix input, is equivalent to a
3997     row-major ordering.. i.e for the following matrix, the input data expected is
3998     as shown:
3999 
4000         1 0 0
4001         2 0 3     P0
4002        -------
4003         4 5 6     P1
4004 
4005      Process0 [P0]: rows_owned=[0,1]
4006         i =  {0,1,3}  [size = nrow+1  = 2+1]
4007         j =  {0,0,2}  [size = nz = 6]
4008         v =  {1,2,3}  [size = nz = 6]
4009 
4010      Process1 [P1]: rows_owned=[2]
4011         i =  {0,3}    [size = nrow+1  = 1+1]
4012         j =  {0,1,2}  [size = nz = 6]
4013         v =  {4,5,6}  [size = nz = 6]
4014 
4015 .keywords: matrix, aij, compressed row, sparse, parallel
4016 
4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4018           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4019 @*/
4020 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4021 {
4022   PetscErrorCode ierr;
4023 
4024   PetscFunctionBegin;
4025   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4026   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4027   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4028   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4029   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4030   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4031   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatCreateAIJ"
4037 /*@C
4038    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4039    (the default parallel PETSc format).  For good matrix assembly performance
4040    the user should preallocate the matrix storage by setting the parameters
4041    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4042    performance can be increased by more than a factor of 50.
4043 
4044    Collective on MPI_Comm
4045 
4046    Input Parameters:
4047 +  comm - MPI communicator
4048 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4049            This value should be the same as the local size used in creating the
4050            y vector for the matrix-vector product y = Ax.
4051 .  n - This value should be the same as the local size used in creating the
4052        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4053        calculated if N is given) For square matrices n is almost always m.
4054 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4055 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4056 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4057            (same value is used for all local rows)
4058 .  d_nnz - array containing the number of nonzeros in the various rows of the
4059            DIAGONAL portion of the local submatrix (possibly different for each row)
4060            or NULL, if d_nz is used to specify the nonzero structure.
4061            The size of this array is equal to the number of local rows, i.e 'm'.
4062 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4063            submatrix (same value is used for all local rows).
4064 -  o_nnz - array containing the number of nonzeros in the various rows of the
4065            OFF-DIAGONAL portion of the local submatrix (possibly different for
4066            each row) or NULL, if o_nz is used to specify the nonzero
4067            structure. The size of this array is equal to the number
4068            of local rows, i.e 'm'.
4069 
4070    Output Parameter:
4071 .  A - the matrix
4072 
4073    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4074    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4075    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4076 
4077    Notes:
4078    If the *_nnz parameter is given then the *_nz parameter is ignored
4079 
4080    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4081    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4082    storage requirements for this matrix.
4083 
4084    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4085    processor than it must be used on all processors that share the object for
4086    that argument.
4087 
4088    The user MUST specify either the local or global matrix dimensions
4089    (possibly both).
4090 
4091    The parallel matrix is partitioned across processors such that the
4092    first m0 rows belong to process 0, the next m1 rows belong to
4093    process 1, the next m2 rows belong to process 2 etc.. where
4094    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4095    values corresponding to [m x N] submatrix.
4096 
4097    The columns are logically partitioned with the n0 columns belonging
4098    to 0th partition, the next n1 columns belonging to the next
4099    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4100 
4101    The DIAGONAL portion of the local submatrix on any given processor
4102    is the submatrix corresponding to the rows and columns m,n
4103    corresponding to the given processor. i.e diagonal matrix on
4104    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4105    etc. The remaining portion of the local submatrix [m x (N-n)]
4106    constitute the OFF-DIAGONAL portion. The example below better
4107    illustrates this concept.
4108 
4109    For a square global matrix we define each processor's diagonal portion
4110    to be its local rows and the corresponding columns (a square submatrix);
4111    each processor's off-diagonal portion encompasses the remainder of the
4112    local matrix (a rectangular submatrix).
4113 
4114    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4115 
4116    When calling this routine with a single process communicator, a matrix of
4117    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4118    type of communicator, use the construction mechanism:
4119      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4120 
4121    By default, this format uses inodes (identical nodes) when possible.
4122    We search for consecutive rows with the same nonzero structure, thereby
4123    reusing matrix information to achieve increased efficiency.
4124 
4125    Options Database Keys:
4126 +  -mat_no_inode  - Do not use inodes
4127 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4128 -  -mat_aij_oneindex - Internally use indexing starting at 1
4129         rather than 0.  Note that when calling MatSetValues(),
4130         the user still MUST index entries starting at 0!
4131 
4132 
4133    Example usage:
4134 
4135    Consider the following 8x8 matrix with 34 non-zero values, that is
4136    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4137    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4138    as follows:
4139 
4140 .vb
4141             1  2  0  |  0  3  0  |  0  4
4142     Proc0   0  5  6  |  7  0  0  |  8  0
4143             9  0 10  | 11  0  0  | 12  0
4144     -------------------------------------
4145            13  0 14  | 15 16 17  |  0  0
4146     Proc1   0 18  0  | 19 20 21  |  0  0
4147             0  0  0  | 22 23  0  | 24  0
4148     -------------------------------------
4149     Proc2  25 26 27  |  0  0 28  | 29  0
4150            30  0  0  | 31 32 33  |  0 34
4151 .ve
4152 
4153    This can be represented as a collection of submatrices as:
4154 
4155 .vb
4156       A B C
4157       D E F
4158       G H I
4159 .ve
4160 
4161    Where the submatrices A,B,C are owned by proc0, D,E,F are
4162    owned by proc1, G,H,I are owned by proc2.
4163 
4164    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'M','N' parameters are 8,8, and have the same values on all procs.
4167 
4168    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4169    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4170    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4171    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4172    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4173    matrix, ans [DF] as another SeqAIJ matrix.
4174 
4175    When d_nz, o_nz parameters are specified, d_nz storage elements are
4176    allocated for every row of the local diagonal submatrix, and o_nz
4177    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4178    One way to choose d_nz and o_nz is to use the max nonzerors per local
4179    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4180    In this case, the values of d_nz,o_nz are:
4181 .vb
4182      proc0 : dnz = 2, o_nz = 2
4183      proc1 : dnz = 3, o_nz = 2
4184      proc2 : dnz = 1, o_nz = 4
4185 .ve
4186    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4187    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4188    for proc3. i.e we are using 12+15+10=37 storage locations to store
4189    34 values.
4190 
4191    When d_nnz, o_nnz parameters are specified, the storage is specified
4192    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4193    In the above case the values for d_nnz,o_nnz are:
4194 .vb
4195      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4196      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4197      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4198 .ve
4199    Here the space allocated is sum of all the above values i.e 34, and
4200    hence pre-allocation is perfect.
4201 
4202    Level: intermediate
4203 
4204 .keywords: matrix, aij, compressed row, sparse, parallel
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4207           MPIAIJ, MatCreateMPIAIJWithArrays()
4208 @*/
4209 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4210 {
4211   PetscErrorCode ierr;
4212   PetscMPIInt    size;
4213 
4214   PetscFunctionBegin;
4215   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4216   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4218   if (size > 1) {
4219     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4220     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4221   } else {
4222     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4223     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4224   }
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 #undef __FUNCT__
4229 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4230 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4231 {
4232   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4233 
4234   PetscFunctionBegin;
4235   if (Ad)     *Ad     = a->A;
4236   if (Ao)     *Ao     = a->B;
4237   if (colmap) *colmap = a->garray;
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4243 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4244 {
4245   PetscErrorCode ierr;
4246   PetscInt       i;
4247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4248 
4249   PetscFunctionBegin;
4250   if (coloring->ctype == IS_COLORING_GLOBAL) {
4251     ISColoringValue *allcolors,*colors;
4252     ISColoring      ocoloring;
4253 
4254     /* set coloring for diagonal portion */
4255     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4256 
4257     /* set coloring for off-diagonal portion */
4258     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4259     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4260     for (i=0; i<a->B->cmap->n; i++) {
4261       colors[i] = allcolors[a->garray[i]];
4262     }
4263     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4264     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4265     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4266     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4267   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4268     ISColoringValue *colors;
4269     PetscInt        *larray;
4270     ISColoring      ocoloring;
4271 
4272     /* set coloring for diagonal portion */
4273     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4274     for (i=0; i<a->A->cmap->n; i++) {
4275       larray[i] = i + A->cmap->rstart;
4276     }
4277     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4278     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->A->cmap->n; i++) {
4280       colors[i] = coloring->colors[larray[i]];
4281     }
4282     ierr = PetscFree(larray);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286 
4287     /* set coloring for off-diagonal portion */
4288     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4289     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4290     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4291     for (i=0; i<a->B->cmap->n; i++) {
4292       colors[i] = coloring->colors[larray[i]];
4293     }
4294     ierr = PetscFree(larray);CHKERRQ(ierr);
4295     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4296     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4297     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4298   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 #undef __FUNCT__
4303 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4304 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4305 {
4306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4307   PetscErrorCode ierr;
4308 
4309   PetscFunctionBegin;
4310   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4311   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4317 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4318 {
4319   PetscErrorCode ierr;
4320   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4321   PetscInt       *indx;
4322 
4323   PetscFunctionBegin;
4324   /* This routine will ONLY return MPIAIJ type matrix */
4325   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4326   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4327   if (n == PETSC_DECIDE) {
4328     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4329   }
4330   /* Check sum(n) = N */
4331   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4332   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4333 
4334   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4335   rstart -= m;
4336 
4337   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4338   for (i=0; i<m; i++) {
4339     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4340     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4341     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4342   }
4343 
4344   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4345   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4346   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4347   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4348   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4349   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4350   PetscFunctionReturn(0);
4351 }
4352 
4353 #undef __FUNCT__
4354 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4355 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4356 {
4357   PetscErrorCode ierr;
4358   PetscInt       m,N,i,rstart,nnz,Ii;
4359   PetscInt       *indx;
4360   PetscScalar    *values;
4361 
4362   PetscFunctionBegin;
4363   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4364   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4365   for (i=0; i<m; i++) {
4366     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4367     Ii   = i + rstart;
4368     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4369     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4370   }
4371   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4372   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4373   PetscFunctionReturn(0);
4374 }
4375 
4376 #undef __FUNCT__
4377 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4378 /*@
4379       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4380                  matrices from each processor
4381 
4382     Collective on MPI_Comm
4383 
4384    Input Parameters:
4385 +    comm - the communicators the parallel matrix will live on
4386 .    inmat - the input sequential matrices
4387 .    n - number of local columns (or PETSC_DECIDE)
4388 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4389 
4390    Output Parameter:
4391 .    outmat - the parallel matrix generated
4392 
4393     Level: advanced
4394 
4395    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4396 
4397 @*/
4398 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4399 {
4400   PetscErrorCode ierr;
4401   PetscMPIInt    size;
4402 
4403   PetscFunctionBegin;
4404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4405   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4406   if (size == 1) {
4407     if (scall == MAT_INITIAL_MATRIX) {
4408       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4409     } else {
4410       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4411     }
4412   } else {
4413     if (scall == MAT_INITIAL_MATRIX) {
4414       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4415     }
4416     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4417   }
4418   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 #undef __FUNCT__
4423 #define __FUNCT__ "MatFileSplit"
4424 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4425 {
4426   PetscErrorCode    ierr;
4427   PetscMPIInt       rank;
4428   PetscInt          m,N,i,rstart,nnz;
4429   size_t            len;
4430   const PetscInt    *indx;
4431   PetscViewer       out;
4432   char              *name;
4433   Mat               B;
4434   const PetscScalar *values;
4435 
4436   PetscFunctionBegin;
4437   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4438   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4439   /* Should this be the type of the diagonal block of A? */
4440   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4441   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4442   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4443   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4444   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4445   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4446   for (i=0; i<m; i++) {
4447     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4448     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4449     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4450   }
4451   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453 
4454   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4455   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4456   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4457   sprintf(name,"%s.%d",outfile,rank);
4458   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4459   ierr = PetscFree(name);CHKERRQ(ierr);
4460   ierr = MatView(B,out);CHKERRQ(ierr);
4461   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4462   ierr = MatDestroy(&B);CHKERRQ(ierr);
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4467 #undef __FUNCT__
4468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4469 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4470 {
4471   PetscErrorCode      ierr;
4472   Mat_Merge_SeqsToMPI *merge;
4473   PetscContainer      container;
4474 
4475   PetscFunctionBegin;
4476   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4477   if (container) {
4478     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4491     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4492     ierr = PetscFree(merge);CHKERRQ(ierr);
4493     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4494   }
4495   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 #include <../src/mat/utils/freespace.h>
4500 #include <petscbt.h>
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4504 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4505 {
4506   PetscErrorCode      ierr;
4507   MPI_Comm            comm;
4508   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4509   PetscMPIInt         size,rank,taga,*len_s;
4510   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4511   PetscInt            proc,m;
4512   PetscInt            **buf_ri,**buf_rj;
4513   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4514   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4515   MPI_Request         *s_waits,*r_waits;
4516   MPI_Status          *status;
4517   MatScalar           *aa=a->a;
4518   MatScalar           **abuf_r,*ba_i;
4519   Mat_Merge_SeqsToMPI *merge;
4520   PetscContainer      container;
4521 
4522   PetscFunctionBegin;
4523   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4524   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4525 
4526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4528 
4529   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4530   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4531 
4532   bi     = merge->bi;
4533   bj     = merge->bj;
4534   buf_ri = merge->buf_ri;
4535   buf_rj = merge->buf_rj;
4536 
4537   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4538   owners = merge->rowmap->range;
4539   len_s  = merge->len_s;
4540 
4541   /* send and recv matrix values */
4542   /*-----------------------------*/
4543   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4544   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4545 
4546   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4547   for (proc=0,k=0; proc<size; proc++) {
4548     if (!len_s[proc]) continue;
4549     i    = owners[proc];
4550     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4551     k++;
4552   }
4553 
4554   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4555   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4556   ierr = PetscFree(status);CHKERRQ(ierr);
4557 
4558   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4559   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4560 
4561   /* insert mat values of mpimat */
4562   /*----------------------------*/
4563   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4564   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4565 
4566   for (k=0; k<merge->nrecv; k++) {
4567     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4568     nrows       = *(buf_ri_k[k]);
4569     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4570     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4571   }
4572 
4573   /* set values of ba */
4574   m = merge->rowmap->n;
4575   for (i=0; i<m; i++) {
4576     arow = owners[rank] + i;
4577     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4578     bnzi = bi[i+1] - bi[i];
4579     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4580 
4581     /* add local non-zero vals of this proc's seqmat into ba */
4582     anzi   = ai[arow+1] - ai[arow];
4583     aj     = a->j + ai[arow];
4584     aa     = a->a + ai[arow];
4585     nextaj = 0;
4586     for (j=0; nextaj<anzi; j++) {
4587       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4588         ba_i[j] += aa[nextaj++];
4589       }
4590     }
4591 
4592     /* add received vals into ba */
4593     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4594       /* i-th row */
4595       if (i == *nextrow[k]) {
4596         anzi   = *(nextai[k]+1) - *nextai[k];
4597         aj     = buf_rj[k] + *(nextai[k]);
4598         aa     = abuf_r[k] + *(nextai[k]);
4599         nextaj = 0;
4600         for (j=0; nextaj<anzi; j++) {
4601           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4602             ba_i[j] += aa[nextaj++];
4603           }
4604         }
4605         nextrow[k]++; nextai[k]++;
4606       }
4607     }
4608     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4609   }
4610   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612 
4613   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4615   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4616   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4617   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4622 
4623 #undef __FUNCT__
4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4625 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4626 {
4627   PetscErrorCode      ierr;
4628   Mat                 B_mpi;
4629   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4630   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4631   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4632   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4633   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4634   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4635   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4636   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4637   MPI_Status          *status;
4638   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4639   PetscBT             lnkbt;
4640   Mat_Merge_SeqsToMPI *merge;
4641   PetscContainer      container;
4642 
4643   PetscFunctionBegin;
4644   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4645 
4646   /* make sure it is a PETSc comm */
4647   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4650 
4651   ierr = PetscNew(&merge);CHKERRQ(ierr);
4652   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4653 
4654   /* determine row ownership */
4655   /*---------------------------------------------------------*/
4656   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4657   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4661   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4662   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4663 
4664   m      = merge->rowmap->n;
4665   owners = merge->rowmap->range;
4666 
4667   /* determine the number of messages to send, their lengths */
4668   /*---------------------------------------------------------*/
4669   len_s = merge->len_s;
4670 
4671   len          = 0; /* length of buf_si[] */
4672   merge->nsend = 0;
4673   for (proc=0; proc<size; proc++) {
4674     len_si[proc] = 0;
4675     if (proc == rank) {
4676       len_s[proc] = 0;
4677     } else {
4678       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4679       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4680     }
4681     if (len_s[proc]) {
4682       merge->nsend++;
4683       nrows = 0;
4684       for (i=owners[proc]; i<owners[proc+1]; i++) {
4685         if (ai[i+1] > ai[i]) nrows++;
4686       }
4687       len_si[proc] = 2*(nrows+1);
4688       len         += len_si[proc];
4689     }
4690   }
4691 
4692   /* determine the number and length of messages to receive for ij-structure */
4693   /*-------------------------------------------------------------------------*/
4694   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4695   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4696 
4697   /* post the Irecv of j-structure */
4698   /*-------------------------------*/
4699   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4700   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4701 
4702   /* post the Isend of j-structure */
4703   /*--------------------------------*/
4704   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4705 
4706   for (proc=0, k=0; proc<size; proc++) {
4707     if (!len_s[proc]) continue;
4708     i    = owners[proc];
4709     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4710     k++;
4711   }
4712 
4713   /* receives and sends of j-structure are complete */
4714   /*------------------------------------------------*/
4715   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4716   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4717 
4718   /* send and recv i-structure */
4719   /*---------------------------*/
4720   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4721   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4722 
4723   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4724   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4725   for (proc=0,k=0; proc<size; proc++) {
4726     if (!len_s[proc]) continue;
4727     /* form outgoing message for i-structure:
4728          buf_si[0]:                 nrows to be sent
4729                [1:nrows]:           row index (global)
4730                [nrows+1:2*nrows+1]: i-structure index
4731     */
4732     /*-------------------------------------------*/
4733     nrows       = len_si[proc]/2 - 1;
4734     buf_si_i    = buf_si + nrows+1;
4735     buf_si[0]   = nrows;
4736     buf_si_i[0] = 0;
4737     nrows       = 0;
4738     for (i=owners[proc]; i<owners[proc+1]; i++) {
4739       anzi = ai[i+1] - ai[i];
4740       if (anzi) {
4741         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4742         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4743         nrows++;
4744       }
4745     }
4746     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4747     k++;
4748     buf_si += len_si[proc];
4749   }
4750 
4751   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4752   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4753 
4754   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4755   for (i=0; i<merge->nrecv; i++) {
4756     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4757   }
4758 
4759   ierr = PetscFree(len_si);CHKERRQ(ierr);
4760   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4761   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4762   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4763   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4764   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4765   ierr = PetscFree(status);CHKERRQ(ierr);
4766 
4767   /* compute a local seq matrix in each processor */
4768   /*----------------------------------------------*/
4769   /* allocate bi array and free space for accumulating nonzero column info */
4770   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4771   bi[0] = 0;
4772 
4773   /* create and initialize a linked list */
4774   nlnk = N+1;
4775   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4776 
4777   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4778   len  = ai[owners[rank+1]] - ai[owners[rank]];
4779   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4780 
4781   current_space = free_space;
4782 
4783   /* determine symbolic info for each local row */
4784   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4785 
4786   for (k=0; k<merge->nrecv; k++) {
4787     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4788     nrows       = *buf_ri_k[k];
4789     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4790     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4791   }
4792 
4793   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4794   len  = 0;
4795   for (i=0; i<m; i++) {
4796     bnzi = 0;
4797     /* add local non-zero cols of this proc's seqmat into lnk */
4798     arow  = owners[rank] + i;
4799     anzi  = ai[arow+1] - ai[arow];
4800     aj    = a->j + ai[arow];
4801     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4802     bnzi += nlnk;
4803     /* add received col data into lnk */
4804     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4805       if (i == *nextrow[k]) { /* i-th row */
4806         anzi  = *(nextai[k]+1) - *nextai[k];
4807         aj    = buf_rj[k] + *nextai[k];
4808         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4809         bnzi += nlnk;
4810         nextrow[k]++; nextai[k]++;
4811       }
4812     }
4813     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4814 
4815     /* if free space is not available, make more free space */
4816     if (current_space->local_remaining<bnzi) {
4817       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4818       nspacedouble++;
4819     }
4820     /* copy data into free space, then initialize lnk */
4821     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4822     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4823 
4824     current_space->array           += bnzi;
4825     current_space->local_used      += bnzi;
4826     current_space->local_remaining -= bnzi;
4827 
4828     bi[i+1] = bi[i] + bnzi;
4829   }
4830 
4831   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4832 
4833   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4834   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4835   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4836 
4837   /* create symbolic parallel matrix B_mpi */
4838   /*---------------------------------------*/
4839   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4840   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4841   if (n==PETSC_DECIDE) {
4842     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4843   } else {
4844     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4845   }
4846   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4847   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4848   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4849   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4850   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4851 
4852   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4853   B_mpi->assembled    = PETSC_FALSE;
4854   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4855   merge->bi           = bi;
4856   merge->bj           = bj;
4857   merge->buf_ri       = buf_ri;
4858   merge->buf_rj       = buf_rj;
4859   merge->coi          = NULL;
4860   merge->coj          = NULL;
4861   merge->owners_co    = NULL;
4862 
4863   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4864 
4865   /* attach the supporting struct to B_mpi for reuse */
4866   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4867   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4868   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4869   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4870   *mpimat = B_mpi;
4871 
4872   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4878 /*@C
4879       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4880                  matrices from each processor
4881 
4882     Collective on MPI_Comm
4883 
4884    Input Parameters:
4885 +    comm - the communicators the parallel matrix will live on
4886 .    seqmat - the input sequential matrices
4887 .    m - number of local rows (or PETSC_DECIDE)
4888 .    n - number of local columns (or PETSC_DECIDE)
4889 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4890 
4891    Output Parameter:
4892 .    mpimat - the parallel matrix generated
4893 
4894     Level: advanced
4895 
4896    Notes:
4897      The dimensions of the sequential matrix in each processor MUST be the same.
4898      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4899      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4900 @*/
4901 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4902 {
4903   PetscErrorCode ierr;
4904   PetscMPIInt    size;
4905 
4906   PetscFunctionBegin;
4907   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4908   if (size == 1) {
4909     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910     if (scall == MAT_INITIAL_MATRIX) {
4911       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4912     } else {
4913       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4914     }
4915     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916     PetscFunctionReturn(0);
4917   }
4918   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919   if (scall == MAT_INITIAL_MATRIX) {
4920     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4921   }
4922   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4923   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 #undef __FUNCT__
4928 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4929 /*@
4930      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4931           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4932           with MatGetSize()
4933 
4934     Not Collective
4935 
4936    Input Parameters:
4937 +    A - the matrix
4938 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 
4940    Output Parameter:
4941 .    A_loc - the local sequential matrix generated
4942 
4943     Level: developer
4944 
4945 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4946 
4947 @*/
4948 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4949 {
4950   PetscErrorCode ierr;
4951   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4952   Mat_SeqAIJ     *mat,*a,*b;
4953   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4954   MatScalar      *aa,*ba,*cam;
4955   PetscScalar    *ca;
4956   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4957   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4958   PetscBool      match;
4959   MPI_Comm       comm;
4960   PetscMPIInt    size;
4961 
4962   PetscFunctionBegin;
4963   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4964   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4965   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4966   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4967   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4968 
4969   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4970   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4971   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4972   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4973   aa = a->a; ba = b->a;
4974   if (scall == MAT_INITIAL_MATRIX) {
4975     if (size == 1) {
4976       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4977       PetscFunctionReturn(0);
4978     }
4979 
4980     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4981     ci[0] = 0;
4982     for (i=0; i<am; i++) {
4983       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4984     }
4985     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4986     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4987     k    = 0;
4988     for (i=0; i<am; i++) {
4989       ncols_o = bi[i+1] - bi[i];
4990       ncols_d = ai[i+1] - ai[i];
4991       /* off-diagonal portion of A */
4992       for (jo=0; jo<ncols_o; jo++) {
4993         col = cmap[*bj];
4994         if (col >= cstart) break;
4995         cj[k]   = col; bj++;
4996         ca[k++] = *ba++;
4997       }
4998       /* diagonal portion of A */
4999       for (j=0; j<ncols_d; j++) {
5000         cj[k]   = cstart + *aj++;
5001         ca[k++] = *aa++;
5002       }
5003       /* off-diagonal portion of A */
5004       for (j=jo; j<ncols_o; j++) {
5005         cj[k]   = cmap[*bj++];
5006         ca[k++] = *ba++;
5007       }
5008     }
5009     /* put together the new matrix */
5010     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5011     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5012     /* Since these are PETSc arrays, change flags to free them as necessary. */
5013     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5014     mat->free_a  = PETSC_TRUE;
5015     mat->free_ij = PETSC_TRUE;
5016     mat->nonew   = 0;
5017   } else if (scall == MAT_REUSE_MATRIX) {
5018     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5019     ci = mat->i; cj = mat->j; cam = mat->a;
5020     for (i=0; i<am; i++) {
5021       /* off-diagonal portion of A */
5022       ncols_o = bi[i+1] - bi[i];
5023       for (jo=0; jo<ncols_o; jo++) {
5024         col = cmap[*bj];
5025         if (col >= cstart) break;
5026         *cam++ = *ba++; bj++;
5027       }
5028       /* diagonal portion of A */
5029       ncols_d = ai[i+1] - ai[i];
5030       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5031       /* off-diagonal portion of A */
5032       for (j=jo; j<ncols_o; j++) {
5033         *cam++ = *ba++; bj++;
5034       }
5035     }
5036   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5037   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 #undef __FUNCT__
5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5043 /*@C
5044      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5045 
5046     Not Collective
5047 
5048    Input Parameters:
5049 +    A - the matrix
5050 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5051 -    row, col - index sets of rows and columns to extract (or NULL)
5052 
5053    Output Parameter:
5054 .    A_loc - the local sequential matrix generated
5055 
5056     Level: developer
5057 
5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5059 
5060 @*/
5061 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5062 {
5063   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5064   PetscErrorCode ierr;
5065   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5066   IS             isrowa,iscola;
5067   Mat            *aloc;
5068   PetscBool      match;
5069 
5070   PetscFunctionBegin;
5071   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5072   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5073   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5074   if (!row) {
5075     start = A->rmap->rstart; end = A->rmap->rend;
5076     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5077   } else {
5078     isrowa = *row;
5079   }
5080   if (!col) {
5081     start = A->cmap->rstart;
5082     cmap  = a->garray;
5083     nzA   = a->A->cmap->n;
5084     nzB   = a->B->cmap->n;
5085     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5086     ncols = 0;
5087     for (i=0; i<nzB; i++) {
5088       if (cmap[i] < start) idx[ncols++] = cmap[i];
5089       else break;
5090     }
5091     imark = i;
5092     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5093     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5094     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5095   } else {
5096     iscola = *col;
5097   }
5098   if (scall != MAT_INITIAL_MATRIX) {
5099     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5100     aloc[0] = *A_loc;
5101   }
5102   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5103   *A_loc = aloc[0];
5104   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5105   if (!row) {
5106     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5107   }
5108   if (!col) {
5109     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5110   }
5111   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatGetBrowsOfAcols"
5117 /*@C
5118     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5119 
5120     Collective on Mat
5121 
5122    Input Parameters:
5123 +    A,B - the matrices in mpiaij format
5124 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5126 
5127    Output Parameter:
5128 +    rowb, colb - index sets of rows and columns of B to extract
5129 -    B_seq - the sequential matrix generated
5130 
5131     Level: developer
5132 
5133 @*/
5134 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5135 {
5136   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5137   PetscErrorCode ierr;
5138   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5139   IS             isrowb,iscolb;
5140   Mat            *bseq=NULL;
5141 
5142   PetscFunctionBegin;
5143   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5144     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5145   }
5146   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5147 
5148   if (scall == MAT_INITIAL_MATRIX) {
5149     start = A->cmap->rstart;
5150     cmap  = a->garray;
5151     nzA   = a->A->cmap->n;
5152     nzB   = a->B->cmap->n;
5153     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5154     ncols = 0;
5155     for (i=0; i<nzB; i++) {  /* row < local row index */
5156       if (cmap[i] < start) idx[ncols++] = cmap[i];
5157       else break;
5158     }
5159     imark = i;
5160     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5161     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5162     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5163     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5164   } else {
5165     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5166     isrowb  = *rowb; iscolb = *colb;
5167     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5168     bseq[0] = *B_seq;
5169   }
5170   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5171   *B_seq = bseq[0];
5172   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5173   if (!rowb) {
5174     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5175   } else {
5176     *rowb = isrowb;
5177   }
5178   if (!colb) {
5179     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5180   } else {
5181     *colb = iscolb;
5182   }
5183   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5184   PetscFunctionReturn(0);
5185 }
5186 
5187 #undef __FUNCT__
5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5189 /*
5190     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5191     of the OFF-DIAGONAL portion of local A
5192 
5193     Collective on Mat
5194 
5195    Input Parameters:
5196 +    A,B - the matrices in mpiaij format
5197 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5198 
5199    Output Parameter:
5200 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5201 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5202 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5203 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5204 
5205     Level: developer
5206 
5207 */
5208 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5209 {
5210   VecScatter_MPI_General *gen_to,*gen_from;
5211   PetscErrorCode         ierr;
5212   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5213   Mat_SeqAIJ             *b_oth;
5214   VecScatter             ctx =a->Mvctx;
5215   MPI_Comm               comm;
5216   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5217   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5218   PetscScalar            *rvalues,*svalues;
5219   MatScalar              *b_otha,*bufa,*bufA;
5220   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5221   MPI_Request            *rwaits = NULL,*swaits = NULL;
5222   MPI_Status             *sstatus,rstatus;
5223   PetscMPIInt            jj,size;
5224   PetscInt               *cols,sbs,rbs;
5225   PetscScalar            *vals;
5226 
5227   PetscFunctionBegin;
5228   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5229   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5230   if (size == 1) PetscFunctionReturn(0);
5231 
5232   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5233     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5234   }
5235   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5236   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5237 
5238   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5239   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5240   rvalues  = gen_from->values; /* holds the length of receiving row */
5241   svalues  = gen_to->values;   /* holds the length of sending row */
5242   nrecvs   = gen_from->n;
5243   nsends   = gen_to->n;
5244 
5245   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5246   srow    = gen_to->indices;    /* local row index to be sent */
5247   sstarts = gen_to->starts;
5248   sprocs  = gen_to->procs;
5249   sstatus = gen_to->sstatus;
5250   sbs     = gen_to->bs;
5251   rstarts = gen_from->starts;
5252   rprocs  = gen_from->procs;
5253   rbs     = gen_from->bs;
5254 
5255   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5256   if (scall == MAT_INITIAL_MATRIX) {
5257     /* i-array */
5258     /*---------*/
5259     /*  post receives */
5260     for (i=0; i<nrecvs; i++) {
5261       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5262       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5263       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5264     }
5265 
5266     /* pack the outgoing message */
5267     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5268 
5269     sstartsj[0] = 0;
5270     rstartsj[0] = 0;
5271     len         = 0; /* total length of j or a array to be sent */
5272     k           = 0;
5273     for (i=0; i<nsends; i++) {
5274       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5275       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5276       for (j=0; j<nrows; j++) {
5277         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5278         for (l=0; l<sbs; l++) {
5279           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5280 
5281           rowlen[j*sbs+l] = ncols;
5282 
5283           len += ncols;
5284           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5285         }
5286         k++;
5287       }
5288       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5289 
5290       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5291     }
5292     /* recvs and sends of i-array are completed */
5293     i = nrecvs;
5294     while (i--) {
5295       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5296     }
5297     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5298 
5299     /* allocate buffers for sending j and a arrays */
5300     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5301     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5302 
5303     /* create i-array of B_oth */
5304     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5305 
5306     b_othi[0] = 0;
5307     len       = 0; /* total length of j or a array to be received */
5308     k         = 0;
5309     for (i=0; i<nrecvs; i++) {
5310       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5311       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5312       for (j=0; j<nrows; j++) {
5313         b_othi[k+1] = b_othi[k] + rowlen[j];
5314         len        += rowlen[j]; k++;
5315       }
5316       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5317     }
5318 
5319     /* allocate space for j and a arrrays of B_oth */
5320     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5321     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5322 
5323     /* j-array */
5324     /*---------*/
5325     /*  post receives of j-array */
5326     for (i=0; i<nrecvs; i++) {
5327       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5328       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* pack the outgoing message j-array */
5332     k = 0;
5333     for (i=0; i<nsends; i++) {
5334       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335       bufJ  = bufj+sstartsj[i];
5336       for (j=0; j<nrows; j++) {
5337         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5338         for (ll=0; ll<sbs; ll++) {
5339           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5340           for (l=0; l<ncols; l++) {
5341             *bufJ++ = cols[l];
5342           }
5343           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5344         }
5345       }
5346       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347     }
5348 
5349     /* recvs and sends of j-array are completed */
5350     i = nrecvs;
5351     while (i--) {
5352       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353     }
5354     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355   } else if (scall == MAT_REUSE_MATRIX) {
5356     sstartsj = *startsj_s;
5357     rstartsj = *startsj_r;
5358     bufa     = *bufa_ptr;
5359     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5360     b_otha   = b_oth->a;
5361   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5362 
5363   /* a-array */
5364   /*---------*/
5365   /*  post receives of a-array */
5366   for (i=0; i<nrecvs; i++) {
5367     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5368     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5369   }
5370 
5371   /* pack the outgoing message a-array */
5372   k = 0;
5373   for (i=0; i<nsends; i++) {
5374     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5375     bufA  = bufa+sstartsj[i];
5376     for (j=0; j<nrows; j++) {
5377       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5378       for (ll=0; ll<sbs; ll++) {
5379         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5380         for (l=0; l<ncols; l++) {
5381           *bufA++ = vals[l];
5382         }
5383         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5384       }
5385     }
5386     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5387   }
5388   /* recvs and sends of a-array are completed */
5389   i = nrecvs;
5390   while (i--) {
5391     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5392   }
5393   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5394   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5395 
5396   if (scall == MAT_INITIAL_MATRIX) {
5397     /* put together the new matrix */
5398     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5399 
5400     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5401     /* Since these are PETSc arrays, change flags to free them as necessary. */
5402     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5403     b_oth->free_a  = PETSC_TRUE;
5404     b_oth->free_ij = PETSC_TRUE;
5405     b_oth->nonew   = 0;
5406 
5407     ierr = PetscFree(bufj);CHKERRQ(ierr);
5408     if (!startsj_s || !bufa_ptr) {
5409       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5410       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5411     } else {
5412       *startsj_s = sstartsj;
5413       *startsj_r = rstartsj;
5414       *bufa_ptr  = bufa;
5415     }
5416   }
5417   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 #undef __FUNCT__
5422 #define __FUNCT__ "MatGetCommunicationStructs"
5423 /*@C
5424   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5425 
5426   Not Collective
5427 
5428   Input Parameters:
5429 . A - The matrix in mpiaij format
5430 
5431   Output Parameter:
5432 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5433 . colmap - A map from global column index to local index into lvec
5434 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5435 
5436   Level: developer
5437 
5438 @*/
5439 #if defined(PETSC_USE_CTABLE)
5440 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5441 #else
5442 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5443 #endif
5444 {
5445   Mat_MPIAIJ *a;
5446 
5447   PetscFunctionBegin;
5448   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5449   PetscValidPointer(lvec, 2);
5450   PetscValidPointer(colmap, 3);
5451   PetscValidPointer(multScatter, 4);
5452   a = (Mat_MPIAIJ*) A->data;
5453   if (lvec) *lvec = a->lvec;
5454   if (colmap) *colmap = a->colmap;
5455   if (multScatter) *multScatter = a->Mvctx;
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5462 
5463 #undef __FUNCT__
5464 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5465 /*
5466     Computes (B'*A')' since computing B*A directly is untenable
5467 
5468                n                       p                          p
5469         (              )       (              )         (                  )
5470       m (      A       )  *  n (       B      )   =   m (         C        )
5471         (              )       (              )         (                  )
5472 
5473 */
5474 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5475 {
5476   PetscErrorCode ierr;
5477   Mat            At,Bt,Ct;
5478 
5479   PetscFunctionBegin;
5480   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5481   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5482   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5483   ierr = MatDestroy(&At);CHKERRQ(ierr);
5484   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5485   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5486   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5492 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5493 {
5494   PetscErrorCode ierr;
5495   PetscInt       m=A->rmap->n,n=B->cmap->n;
5496   Mat            Cmat;
5497 
5498   PetscFunctionBegin;
5499   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5500   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5501   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5502   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5503   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5504   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5505   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5506   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5507 
5508   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5509 
5510   *C = Cmat;
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 /* ----------------------------------------------------------------*/
5515 #undef __FUNCT__
5516 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5517 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5518 {
5519   PetscErrorCode ierr;
5520 
5521   PetscFunctionBegin;
5522   if (scall == MAT_INITIAL_MATRIX) {
5523     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5524     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5525     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5526   }
5527   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5528   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5529   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 #if defined(PETSC_HAVE_MUMPS)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5535 #endif
5536 #if defined(PETSC_HAVE_PASTIX)
5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5538 #endif
5539 #if defined(PETSC_HAVE_SUPERLU_DIST)
5540 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5541 #endif
5542 #if defined(PETSC_HAVE_CLIQUE)
5543 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5544 #endif
5545 
5546 /*MC
5547    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5548 
5549    Options Database Keys:
5550 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5551 
5552   Level: beginner
5553 
5554 .seealso: MatCreateAIJ()
5555 M*/
5556 
5557 #undef __FUNCT__
5558 #define __FUNCT__ "MatCreate_MPIAIJ"
5559 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5560 {
5561   Mat_MPIAIJ     *b;
5562   PetscErrorCode ierr;
5563   PetscMPIInt    size;
5564 
5565   PetscFunctionBegin;
5566   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5567 
5568   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5569   B->data       = (void*)b;
5570   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5571   B->assembled  = PETSC_FALSE;
5572   B->insertmode = NOT_SET_VALUES;
5573   b->size       = size;
5574 
5575   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5576 
5577   /* build cache for off array entries formed */
5578   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5579 
5580   b->donotstash  = PETSC_FALSE;
5581   b->colmap      = 0;
5582   b->garray      = 0;
5583   b->roworiented = PETSC_TRUE;
5584 
5585   /* stuff used for matrix vector multiply */
5586   b->lvec  = NULL;
5587   b->Mvctx = NULL;
5588 
5589   /* stuff for MatGetRow() */
5590   b->rowindices   = 0;
5591   b->rowvalues    = 0;
5592   b->getrowactive = PETSC_FALSE;
5593 
5594   /* flexible pointer used in CUSP/CUSPARSE classes */
5595   b->spptr = NULL;
5596 
5597 #if defined(PETSC_HAVE_MUMPS)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5599 #endif
5600 #if defined(PETSC_HAVE_PASTIX)
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5602 #endif
5603 #if defined(PETSC_HAVE_SUPERLU_DIST)
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5605 #endif
5606 #if defined(PETSC_HAVE_CLIQUE)
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5608 #endif
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5623   PetscFunctionReturn(0);
5624 }
5625 
5626 #undef __FUNCT__
5627 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5628 /*@C
5629      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5630          and "off-diagonal" part of the matrix in CSR format.
5631 
5632    Collective on MPI_Comm
5633 
5634    Input Parameters:
5635 +  comm - MPI communicator
5636 .  m - number of local rows (Cannot be PETSC_DECIDE)
5637 .  n - This value should be the same as the local size used in creating the
5638        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5639        calculated if N is given) For square matrices n is almost always m.
5640 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5641 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5642 .   i - row indices for "diagonal" portion of matrix
5643 .   j - column indices
5644 .   a - matrix values
5645 .   oi - row indices for "off-diagonal" portion of matrix
5646 .   oj - column indices
5647 -   oa - matrix values
5648 
5649    Output Parameter:
5650 .   mat - the matrix
5651 
5652    Level: advanced
5653 
5654    Notes:
5655        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5656        must free the arrays once the matrix has been destroyed and not before.
5657 
5658        The i and j indices are 0 based
5659 
5660        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5661 
5662        This sets local rows and cannot be used to set off-processor values.
5663 
5664        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5665        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5666        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5667        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5668        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5669        communication if it is known that only local entries will be set.
5670 
5671 .keywords: matrix, aij, compressed row, sparse, parallel
5672 
5673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5674           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5675 C@*/
5676 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5677 {
5678   PetscErrorCode ierr;
5679   Mat_MPIAIJ     *maij;
5680 
5681   PetscFunctionBegin;
5682   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5683   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5684   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5685   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5686   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5687   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5688   maij = (Mat_MPIAIJ*) (*mat)->data;
5689 
5690   (*mat)->preallocated = PETSC_TRUE;
5691 
5692   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5693   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5694 
5695   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5696   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5697 
5698   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702 
5703   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5706   PetscFunctionReturn(0);
5707 }
5708 
5709 /*
5710     Special version for direct calls from Fortran
5711 */
5712 #include <petsc-private/fortranimpl.h>
5713 
5714 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5715 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5716 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5717 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5718 #endif
5719 
5720 /* Change these macros so can be used in void function */
5721 #undef CHKERRQ
5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5723 #undef SETERRQ2
5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5725 #undef SETERRQ3
5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5727 #undef SETERRQ
5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5729 
5730 #undef __FUNCT__
5731 #define __FUNCT__ "matsetvaluesmpiaij_"
5732 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5733 {
5734   Mat            mat  = *mmat;
5735   PetscInt       m    = *mm, n = *mn;
5736   InsertMode     addv = *maddv;
5737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5738   PetscScalar    value;
5739   PetscErrorCode ierr;
5740 
5741   MatCheckPreallocated(mat,1);
5742   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5743 
5744 #if defined(PETSC_USE_DEBUG)
5745   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5746 #endif
5747   {
5748     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5749     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5750     PetscBool roworiented = aij->roworiented;
5751 
5752     /* Some Variables required in the macro */
5753     Mat        A                 = aij->A;
5754     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5755     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5756     MatScalar  *aa               = a->a;
5757     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5758     Mat        B                 = aij->B;
5759     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5760     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5761     MatScalar  *ba               = b->a;
5762 
5763     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5764     PetscInt  nonew = a->nonew;
5765     MatScalar *ap1,*ap2;
5766 
5767     PetscFunctionBegin;
5768     for (i=0; i<m; i++) {
5769       if (im[i] < 0) continue;
5770 #if defined(PETSC_USE_DEBUG)
5771       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5772 #endif
5773       if (im[i] >= rstart && im[i] < rend) {
5774         row      = im[i] - rstart;
5775         lastcol1 = -1;
5776         rp1      = aj + ai[row];
5777         ap1      = aa + ai[row];
5778         rmax1    = aimax[row];
5779         nrow1    = ailen[row];
5780         low1     = 0;
5781         high1    = nrow1;
5782         lastcol2 = -1;
5783         rp2      = bj + bi[row];
5784         ap2      = ba + bi[row];
5785         rmax2    = bimax[row];
5786         nrow2    = bilen[row];
5787         low2     = 0;
5788         high2    = nrow2;
5789 
5790         for (j=0; j<n; j++) {
5791           if (roworiented) value = v[i*n+j];
5792           else value = v[i+j*m];
5793           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5794           if (in[j] >= cstart && in[j] < cend) {
5795             col = in[j] - cstart;
5796             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5797           } else if (in[j] < 0) continue;
5798 #if defined(PETSC_USE_DEBUG)
5799           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5800 #endif
5801           else {
5802             if (mat->was_assembled) {
5803               if (!aij->colmap) {
5804                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5805               }
5806 #if defined(PETSC_USE_CTABLE)
5807               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5808               col--;
5809 #else
5810               col = aij->colmap[in[j]] - 1;
5811 #endif
5812               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5813                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5814                 col  =  in[j];
5815                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5816                 B     = aij->B;
5817                 b     = (Mat_SeqAIJ*)B->data;
5818                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5819                 rp2   = bj + bi[row];
5820                 ap2   = ba + bi[row];
5821                 rmax2 = bimax[row];
5822                 nrow2 = bilen[row];
5823                 low2  = 0;
5824                 high2 = nrow2;
5825                 bm    = aij->B->rmap->n;
5826                 ba    = b->a;
5827               }
5828             } else col = in[j];
5829             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5830           }
5831         }
5832       } else if (!aij->donotstash) {
5833         if (roworiented) {
5834           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5835         } else {
5836           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5837         }
5838       }
5839     }
5840   }
5841   PetscFunctionReturnVoid();
5842 }
5843 
5844