xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 461878b29a91ee2f8c912b31b68f4de725324203)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386     const char *matname;
1387 
1388     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1389     if (!rank) {
1390       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1391     } else {
1392       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1393     }
1394     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1395     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1396     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1397     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1398     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1399 
1400     /* copy over the A part */
1401     Aloc = (Mat_SeqAIJ*)aij->A->data;
1402     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1403     row  = mat->rmap->rstart;
1404     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1405     for (i=0; i<m; i++) {
1406       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1407       row++;
1408       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1409     }
1410     aj = Aloc->j;
1411     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1412 
1413     /* copy over the B part */
1414     Aloc = (Mat_SeqAIJ*)aij->B->data;
1415     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1416     row  = mat->rmap->rstart;
1417     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1418     ct   = cols;
1419     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1420     for (i=0; i<m; i++) {
1421       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1422       row++;
1423       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1424     }
1425     ierr = PetscFree(ct);CHKERRQ(ierr);
1426     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     /*
1429        Everyone has to call to draw the matrix since the graphics waits are
1430        synchronized across all processors that share the PetscDraw object
1431     */
1432     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1433     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1434     if (!rank) {
1435       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1436       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1437     }
1438     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1439     ierr = MatDestroy(&A);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatView_MPIAIJ"
1446 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1447 {
1448   PetscErrorCode ierr;
1449   PetscBool      iascii,isdraw,issocket,isbinary;
1450 
1451   PetscFunctionBegin;
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1456   if (iascii || isdraw || isbinary || issocket) {
1457     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1458   }
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatSOR_MPIAIJ"
1464 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1465 {
1466   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1467   PetscErrorCode ierr;
1468   Vec            bb1 = 0;
1469   PetscBool      hasop;
1470 
1471   PetscFunctionBegin;
1472   if (flag == SOR_APPLY_UPPER) {
1473     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474     PetscFunctionReturn(0);
1475   }
1476 
1477   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1478     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1479   }
1480 
1481   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1482     if (flag & SOR_ZERO_INITIAL_GUESS) {
1483       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1484       its--;
1485     }
1486 
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503     while (its--) {
1504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506 
1507       /* update rhs: bb1 = bb - B*x */
1508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1510 
1511       /* local sweep */
1512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1513     }
1514   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1515     if (flag & SOR_ZERO_INITIAL_GUESS) {
1516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1517       its--;
1518     }
1519     while (its--) {
1520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522 
1523       /* update rhs: bb1 = bb - B*x */
1524       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1525       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1526 
1527       /* local sweep */
1528       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1529     }
1530   } else if (flag & SOR_EISENSTAT) {
1531     Vec xx1;
1532 
1533     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1534     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1535 
1536     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538     if (!mat->diag) {
1539       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1540       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1541     }
1542     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1543     if (hasop) {
1544       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1545     } else {
1546       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1547     }
1548     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1549 
1550     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1551 
1552     /* local sweep */
1553     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1554     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1555     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1556   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1557 
1558   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatPermute_MPIAIJ"
1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1565 {
1566   Mat            aA,aB,Aperm;
1567   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1568   PetscScalar    *aa,*ba;
1569   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1570   PetscSF        rowsf,sf;
1571   IS             parcolp = NULL;
1572   PetscBool      done;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1577   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1578   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1579   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1580 
1581   /* Invert row permutation to find out where my rows should go */
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1585   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1586   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1588 
1589   /* Invert column permutation to find out where my columns should go */
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1593   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1594   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1597 
1598   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1599   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1600   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1601 
1602   /* Find out where my gcols should go */
1603   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1604   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1605   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1606   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1607   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1608   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1613   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1614   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1615   for (i=0; i<m; i++) {
1616     PetscInt row = rdest[i],rowner;
1617     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       PetscInt cowner,col = cdest[aj[j]];
1620       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1621       if (rowner == cowner) dnnz[i]++;
1622       else onnz[i]++;
1623     }
1624     for (j=bi[i]; j<bi[i+1]; j++) {
1625       PetscInt cowner,col = gcdest[bj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630   }
1631   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1632   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1633   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1636 
1637   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1638   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1639   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1642     PetscInt j0,rowlen;
1643     rowlen = ai[i+1] - ai[i];
1644     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1645       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1646       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1647     }
1648     rowlen = bi[i+1] - bi[i];
1649     for (j0=j=0; j<rowlen; j0=j) {
1650       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1651       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1652     }
1653   }
1654   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1655   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1656   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1657   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1658   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1659   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1660   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1661   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1662   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1663   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1664   *B = Aperm;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1670 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1671 {
1672   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1673   Mat            A    = mat->A,B = mat->B;
1674   PetscErrorCode ierr;
1675   PetscReal      isend[5],irecv[5];
1676 
1677   PetscFunctionBegin;
1678   info->block_size = 1.0;
1679   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1680 
1681   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1682   isend[3] = info->memory;  isend[4] = info->mallocs;
1683 
1684   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1685 
1686   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1687   isend[3] += info->memory;  isend[4] += info->mallocs;
1688   if (flag == MAT_LOCAL) {
1689     info->nz_used      = isend[0];
1690     info->nz_allocated = isend[1];
1691     info->nz_unneeded  = isend[2];
1692     info->memory       = isend[3];
1693     info->mallocs      = isend[4];
1694   } else if (flag == MAT_GLOBAL_MAX) {
1695     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1696 
1697     info->nz_used      = irecv[0];
1698     info->nz_allocated = irecv[1];
1699     info->nz_unneeded  = irecv[2];
1700     info->memory       = irecv[3];
1701     info->mallocs      = irecv[4];
1702   } else if (flag == MAT_GLOBAL_SUM) {
1703     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1704 
1705     info->nz_used      = irecv[0];
1706     info->nz_allocated = irecv[1];
1707     info->nz_unneeded  = irecv[2];
1708     info->memory       = irecv[3];
1709     info->mallocs      = irecv[4];
1710   }
1711   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1712   info->fill_ratio_needed = 0;
1713   info->factor_mallocs    = 0;
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatSetOption_MPIAIJ"
1719 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1720 {
1721   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   switch (op) {
1726   case MAT_NEW_NONZERO_LOCATIONS:
1727   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1728   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1729   case MAT_KEEP_NONZERO_PATTERN:
1730   case MAT_NEW_NONZERO_LOCATION_ERR:
1731   case MAT_USE_INODES:
1732   case MAT_IGNORE_ZERO_ENTRIES:
1733     MatCheckPreallocated(A,1);
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_ROW_ORIENTED:
1738     a->roworiented = flg;
1739 
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_NEW_DIAGONALS:
1744     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1745     break;
1746   case MAT_IGNORE_OFF_PROC_ENTRIES:
1747     a->donotstash = flg;
1748     break;
1749   case MAT_SPD:
1750     A->spd_set = PETSC_TRUE;
1751     A->spd     = flg;
1752     if (flg) {
1753       A->symmetric                  = PETSC_TRUE;
1754       A->structurally_symmetric     = PETSC_TRUE;
1755       A->symmetric_set              = PETSC_TRUE;
1756       A->structurally_symmetric_set = PETSC_TRUE;
1757     }
1758     break;
1759   case MAT_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_STRUCTURALLY_SYMMETRIC:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_HERMITIAN:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_SYMMETRY_ETERNAL:
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   default:
1772     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatGetRow_MPIAIJ"
1779 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1780 {
1781   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1782   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1783   PetscErrorCode ierr;
1784   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1785   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1786   PetscInt       *cmap,*idx_p;
1787 
1788   PetscFunctionBegin;
1789   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1790   mat->getrowactive = PETSC_TRUE;
1791 
1792   if (!mat->rowvalues && (idx || v)) {
1793     /*
1794         allocate enough space to hold information from the longest row.
1795     */
1796     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1797     PetscInt   max = 1,tmp;
1798     for (i=0; i<matin->rmap->n; i++) {
1799       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1800       if (max < tmp) max = tmp;
1801     }
1802     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1803   }
1804 
1805   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1806   lrow = row - rstart;
1807 
1808   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1809   if (!v)   {pvA = 0; pvB = 0;}
1810   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1811   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   nztot = nzA + nzB;
1814 
1815   cmap = mat->garray;
1816   if (v  || idx) {
1817     if (nztot) {
1818       /* Sort by increasing column numbers, assuming A and B already sorted */
1819       PetscInt imark = -1;
1820       if (v) {
1821         *v = v_p = mat->rowvalues;
1822         for (i=0; i<nzB; i++) {
1823           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1824           else break;
1825         }
1826         imark = i;
1827         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1828         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1829       }
1830       if (idx) {
1831         *idx = idx_p = mat->rowindices;
1832         if (imark > -1) {
1833           for (i=0; i<imark; i++) {
1834             idx_p[i] = cmap[cworkB[i]];
1835           }
1836         } else {
1837           for (i=0; i<nzB; i++) {
1838             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1839             else break;
1840           }
1841           imark = i;
1842         }
1843         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1844         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1845       }
1846     } else {
1847       if (idx) *idx = 0;
1848       if (v)   *v   = 0;
1849     }
1850   }
1851   *nz  = nztot;
1852   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1853   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1859 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1860 {
1861   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1862 
1863   PetscFunctionBegin;
1864   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1865   aij->getrowactive = PETSC_FALSE;
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatNorm_MPIAIJ"
1871 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1872 {
1873   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1874   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1875   PetscErrorCode ierr;
1876   PetscInt       i,j,cstart = mat->cmap->rstart;
1877   PetscReal      sum = 0.0;
1878   MatScalar      *v;
1879 
1880   PetscFunctionBegin;
1881   if (aij->size == 1) {
1882     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1883   } else {
1884     if (type == NORM_FROBENIUS) {
1885       v = amat->a;
1886       for (i=0; i<amat->nz; i++) {
1887         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1888       }
1889       v = bmat->a;
1890       for (i=0; i<bmat->nz; i++) {
1891         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1892       }
1893       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1894       *norm = PetscSqrtReal(*norm);
1895     } else if (type == NORM_1) { /* max column norm */
1896       PetscReal *tmp,*tmp2;
1897       PetscInt  *jj,*garray = aij->garray;
1898       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1899       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1900       *norm = 0.0;
1901       v     = amat->a; jj = amat->j;
1902       for (j=0; j<amat->nz; j++) {
1903         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1904       }
1905       v = bmat->a; jj = bmat->j;
1906       for (j=0; j<bmat->nz; j++) {
1907         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1908       }
1909       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1910       for (j=0; j<mat->cmap->N; j++) {
1911         if (tmp2[j] > *norm) *norm = tmp2[j];
1912       }
1913       ierr = PetscFree(tmp);CHKERRQ(ierr);
1914       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1915     } else if (type == NORM_INFINITY) { /* max row norm */
1916       PetscReal ntemp = 0.0;
1917       for (j=0; j<aij->A->rmap->n; j++) {
1918         v   = amat->a + amat->i[j];
1919         sum = 0.0;
1920         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1921           sum += PetscAbsScalar(*v); v++;
1922         }
1923         v = bmat->a + bmat->i[j];
1924         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1925           sum += PetscAbsScalar(*v); v++;
1926         }
1927         if (sum > ntemp) ntemp = sum;
1928       }
1929       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1930     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatTranspose_MPIAIJ"
1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1938 {
1939   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1940   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1941   PetscErrorCode ierr;
1942   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1943   PetscInt       cstart = A->cmap->rstart,ncol;
1944   Mat            B;
1945   MatScalar      *array;
1946 
1947   PetscFunctionBegin;
1948   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1949 
1950   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1951   ai = Aloc->i; aj = Aloc->j;
1952   bi = Bloc->i; bj = Bloc->j;
1953   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1954     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1955     PetscSFNode          *oloc;
1956     PETSC_UNUSED PetscSF sf;
1957 
1958     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1959     /* compute d_nnz for preallocation */
1960     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1961     for (i=0; i<ai[ma]; i++) {
1962       d_nnz[aj[i]]++;
1963       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964     }
1965     /* compute local off-diagonal contributions */
1966     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1968     /* map those to global */
1969     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1970     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1971     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1972     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1973     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1974     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1975     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1976 
1977     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1978     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1979     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1980     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1981     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1982     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1983   } else {
1984     B    = *matout;
1985     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1986     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1987   }
1988 
1989   /* copy over the A part */
1990   array = Aloc->a;
1991   row   = A->rmap->rstart;
1992   for (i=0; i<ma; i++) {
1993     ncol = ai[i+1]-ai[i];
1994     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1995     row++;
1996     array += ncol; aj += ncol;
1997   }
1998   aj = Aloc->j;
1999   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2000 
2001   /* copy over the B part */
2002   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2003   array = Bloc->a;
2004   row   = A->rmap->rstart;
2005   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2006   cols_tmp = cols;
2007   for (i=0; i<mb; i++) {
2008     ncol = bi[i+1]-bi[i];
2009     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2010     row++;
2011     array += ncol; cols_tmp += ncol;
2012   }
2013   ierr = PetscFree(cols);CHKERRQ(ierr);
2014 
2015   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2016   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2018     *matout = B;
2019   } else {
2020     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2027 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2028 {
2029   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2030   Mat            a    = aij->A,b = aij->B;
2031   PetscErrorCode ierr;
2032   PetscInt       s1,s2,s3;
2033 
2034   PetscFunctionBegin;
2035   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2036   if (rr) {
2037     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2038     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2039     /* Overlap communication with computation. */
2040     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2041   }
2042   if (ll) {
2043     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2044     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2045     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2046   }
2047   /* scale  the diagonal block */
2048   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2049 
2050   if (rr) {
2051     /* Do a scatter end and then right scale the off-diagonal block */
2052     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2053     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2060 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2061 {
2062   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2063   PetscErrorCode ierr;
2064 
2065   PetscFunctionBegin;
2066   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatEqual_MPIAIJ"
2072 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2073 {
2074   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2075   Mat            a,b,c,d;
2076   PetscBool      flg;
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   a = matA->A; b = matA->B;
2081   c = matB->A; d = matB->B;
2082 
2083   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2084   if (flg) {
2085     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2086   }
2087   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatCopy_MPIAIJ"
2093 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2094 {
2095   PetscErrorCode ierr;
2096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2097   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2098 
2099   PetscFunctionBegin;
2100   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2101   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2102     /* because of the column compression in the off-processor part of the matrix a->B,
2103        the number of columns in a->B and b->B may be different, hence we cannot call
2104        the MatCopy() directly on the two parts. If need be, we can provide a more
2105        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2106        then copying the submatrices */
2107     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2108   } else {
2109     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2110     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatSetUp_MPIAIJ"
2117 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2118 {
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /*
2127    Computes the number of nonzeros per row needed for preallocation when X and Y
2128    have different nonzero structure.
2129 */
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2132 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2133 {
2134   PetscInt       i,j,k,nzx,nzy;
2135 
2136   PetscFunctionBegin;
2137   /* Set the number of nonzeros in the new matrix */
2138   for (i=0; i<m; i++) {
2139     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2140     nzx = xi[i+1] - xi[i];
2141     nzy = yi[i+1] - yi[i];
2142     nnz[i] = 0;
2143     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2144       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2145       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2146       nnz[i]++;
2147     }
2148     for (; k<nzy; k++) nnz[i]++;
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2156 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2157 {
2158   PetscErrorCode ierr;
2159   PetscInt       m = Y->rmap->N;
2160   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2161   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2162 
2163   PetscFunctionBegin;
2164   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatAXPY_MPIAIJ"
2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2174   PetscBLASInt   bnz,one=1;
2175   Mat_SeqAIJ     *x,*y;
2176 
2177   PetscFunctionBegin;
2178   if (str == SAME_NONZERO_PATTERN) {
2179     PetscScalar alpha = a;
2180     x    = (Mat_SeqAIJ*)xx->A->data;
2181     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2182     y    = (Mat_SeqAIJ*)yy->A->data;
2183     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2184     x    = (Mat_SeqAIJ*)xx->B->data;
2185     y    = (Mat_SeqAIJ*)yy->B->data;
2186     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2187     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2188     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2189   } else if (str == SUBSET_NONZERO_PATTERN) {
2190     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2191   } else {
2192     Mat      B;
2193     PetscInt *nnz_d,*nnz_o;
2194     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2195     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2196     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2197     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2198     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2199     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2200     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2201     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2202     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2203     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2204     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2205     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2206     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2207     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatConjugate_MPIAIJ"
2216 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2217 {
2218 #if defined(PETSC_USE_COMPLEX)
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2221 
2222   PetscFunctionBegin;
2223   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2224   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2225 #else
2226   PetscFunctionBegin;
2227 #endif
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatRealPart_MPIAIJ"
2233 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2234 {
2235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2236   PetscErrorCode ierr;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2240   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2253   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #if defined(PETSC_HAVE_PBGL)
2258 
2259 #include <boost/parallel/mpi/bsp_process_group.hpp>
2260 #include <boost/graph/distributed/ilu_default_graph.hpp>
2261 #include <boost/graph/distributed/ilu_0_block.hpp>
2262 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2263 #include <boost/graph/distributed/petsc/interface.hpp>
2264 #include <boost/multi_array.hpp>
2265 #include <boost/parallel/distributed_property_map->hpp>
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2269 /*
2270   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2271 */
2272 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2273 {
2274   namespace petsc = boost::distributed::petsc;
2275 
2276   namespace graph_dist = boost::graph::distributed;
2277   using boost::graph::distributed::ilu_default::process_group_type;
2278   using boost::graph::ilu_permuted;
2279 
2280   PetscBool      row_identity, col_identity;
2281   PetscContainer c;
2282   PetscInt       m, n, M, N;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2287   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2288   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2289   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2290 
2291   process_group_type pg;
2292   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2293   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2294   lgraph_type& level_graph = *lgraph_p;
2295   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2296 
2297   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2298   ilu_permuted(level_graph);
2299 
2300   /* put together the new matrix */
2301   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2302   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2303   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2304   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2305   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2306   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2307   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2308   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309 
2310   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2311   ierr = PetscContainerSetPointer(c, lgraph_p);
2312   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2313   ierr = PetscContainerDestroy(&c);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2319 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2320 {
2321   PetscFunctionBegin;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSolve_MPIAIJ"
2327 /*
2328   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2329 */
2330 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2331 {
2332   namespace graph_dist = boost::graph::distributed;
2333 
2334   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2335   lgraph_type    *lgraph_p;
2336   PetscContainer c;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2341   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2342   ierr = VecCopy(b, x);CHKERRQ(ierr);
2343 
2344   PetscScalar *array_x;
2345   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2346   PetscInt sx;
2347   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2348 
2349   PetscScalar *array_b;
2350   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2351   PetscInt sb;
2352   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2353 
2354   lgraph_type& level_graph = *lgraph_p;
2355   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2356 
2357   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2358   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2359   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2360 
2361   typedef boost::iterator_property_map<array_ref_type::iterator,
2362                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2363   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2364   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2365 
2366   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2367   PetscFunctionReturn(0);
2368 }
2369 #endif
2370 
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2374 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2375 {
2376   PetscMPIInt    rank,size;
2377   MPI_Comm       comm;
2378   PetscErrorCode ierr;
2379   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2380   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2381   PetscInt       *rowrange = mat->rmap->range;
2382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2383   Mat            A = aij->A,B=aij->B,C=*matredundant;
2384   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2385   PetscScalar    *sbuf_a;
2386   PetscInt       nzlocal=a->nz+b->nz;
2387   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2388   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2389   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2390   MatScalar      *aworkA,*aworkB;
2391   PetscScalar    *vals;
2392   PetscMPIInt    tag1,tag2,tag3,imdex;
2393   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2394   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2395   MPI_Status     recv_status,*send_status;
2396   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2397   PetscInt       **rbuf_j=NULL;
2398   PetscScalar    **rbuf_a=NULL;
2399   Mat_Redundant  *redund =NULL;
2400 
2401   PetscFunctionBegin;
2402   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2403   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2405   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2406   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2407 
2408   if (reuse == MAT_REUSE_MATRIX) {
2409     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2410     if (subsize == 1) {
2411       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2412       redund = c->redundant;
2413     } else {
2414       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2415       redund = c->redundant;
2416     }
2417     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2418 
2419     nsends    = redund->nsends;
2420     nrecvs    = redund->nrecvs;
2421     send_rank = redund->send_rank;
2422     recv_rank = redund->recv_rank;
2423     sbuf_nz   = redund->sbuf_nz;
2424     rbuf_nz   = redund->rbuf_nz;
2425     sbuf_j    = redund->sbuf_j;
2426     sbuf_a    = redund->sbuf_a;
2427     rbuf_j    = redund->rbuf_j;
2428     rbuf_a    = redund->rbuf_a;
2429   }
2430 
2431   if (reuse == MAT_INITIAL_MATRIX) {
2432     PetscInt    nleftover,np_subcomm;
2433 
2434     /* get the destination processors' id send_rank, nsends and nrecvs */
2435     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2436 
2437     np_subcomm = size/nsubcomm;
2438     nleftover  = size - nsubcomm*np_subcomm;
2439 
2440     /* block of codes below is specific for INTERLACED */
2441     /* ------------------------------------------------*/
2442     nsends = 0; nrecvs = 0;
2443     for (i=0; i<size; i++) {
2444       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2445         send_rank[nsends++] = i;
2446         recv_rank[nrecvs++] = i;
2447       }
2448     }
2449     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2450       i = size-nleftover-1;
2451       j = 0;
2452       while (j < nsubcomm - nleftover) {
2453         send_rank[nsends++] = i;
2454         i--; j++;
2455       }
2456     }
2457 
2458     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2459       for (i=0; i<nleftover; i++) {
2460         recv_rank[nrecvs++] = size-nleftover+i;
2461       }
2462     }
2463     /*----------------------------------------------*/
2464 
2465     /* allocate sbuf_j, sbuf_a */
2466     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2467     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2468     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2469     /*
2470     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2471     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2472      */
2473   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2474 
2475   /* copy mat's local entries into the buffers */
2476   if (reuse == MAT_INITIAL_MATRIX) {
2477     rownz_max = 0;
2478     rptr      = sbuf_j;
2479     cols      = sbuf_j + rend-rstart + 1;
2480     vals      = sbuf_a;
2481     rptr[0]   = 0;
2482     for (i=0; i<rend-rstart; i++) {
2483       row    = i + rstart;
2484       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2485       ncols  = nzA + nzB;
2486       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2487       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2488       /* load the column indices for this row into cols */
2489       lwrite = 0;
2490       for (l=0; l<nzB; l++) {
2491         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2492           vals[lwrite]   = aworkB[l];
2493           cols[lwrite++] = ctmp;
2494         }
2495       }
2496       for (l=0; l<nzA; l++) {
2497         vals[lwrite]   = aworkA[l];
2498         cols[lwrite++] = cstart + cworkA[l];
2499       }
2500       for (l=0; l<nzB; l++) {
2501         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2502           vals[lwrite]   = aworkB[l];
2503           cols[lwrite++] = ctmp;
2504         }
2505       }
2506       vals     += ncols;
2507       cols     += ncols;
2508       rptr[i+1] = rptr[i] + ncols;
2509       if (rownz_max < ncols) rownz_max = ncols;
2510     }
2511     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2512   } else { /* only copy matrix values into sbuf_a */
2513     rptr    = sbuf_j;
2514     vals    = sbuf_a;
2515     rptr[0] = 0;
2516     for (i=0; i<rend-rstart; i++) {
2517       row    = i + rstart;
2518       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2519       ncols  = nzA + nzB;
2520       cworkB = b->j + b->i[i];
2521       aworkA = a->a + a->i[i];
2522       aworkB = b->a + b->i[i];
2523       lwrite = 0;
2524       for (l=0; l<nzB; l++) {
2525         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2526       }
2527       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2528       for (l=0; l<nzB; l++) {
2529         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2530       }
2531       vals     += ncols;
2532       rptr[i+1] = rptr[i] + ncols;
2533     }
2534   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2535 
2536   /* send nzlocal to others, and recv other's nzlocal */
2537   /*--------------------------------------------------*/
2538   if (reuse == MAT_INITIAL_MATRIX) {
2539     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2540 
2541     s_waits2 = s_waits3 + nsends;
2542     s_waits1 = s_waits2 + nsends;
2543     r_waits1 = s_waits1 + nsends;
2544     r_waits2 = r_waits1 + nrecvs;
2545     r_waits3 = r_waits2 + nrecvs;
2546   } else {
2547     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2548 
2549     r_waits3 = s_waits3 + nsends;
2550   }
2551 
2552   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2553   if (reuse == MAT_INITIAL_MATRIX) {
2554     /* get new tags to keep the communication clean */
2555     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2556     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2557     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2558 
2559     /* post receives of other's nzlocal */
2560     for (i=0; i<nrecvs; i++) {
2561       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2562     }
2563     /* send nzlocal to others */
2564     for (i=0; i<nsends; i++) {
2565       sbuf_nz[i] = nzlocal;
2566       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2567     }
2568     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2569     count = nrecvs;
2570     while (count) {
2571       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2572 
2573       recv_rank[imdex] = recv_status.MPI_SOURCE;
2574       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2575       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2576 
2577       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2578 
2579       rbuf_nz[imdex] += i + 2;
2580 
2581       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2582       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2583       count--;
2584     }
2585     /* wait on sends of nzlocal */
2586     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2587     /* send mat->i,j to others, and recv from other's */
2588     /*------------------------------------------------*/
2589     for (i=0; i<nsends; i++) {
2590       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2591       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2592     }
2593     /* wait on receives of mat->i,j */
2594     /*------------------------------*/
2595     count = nrecvs;
2596     while (count) {
2597       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2598       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2599       count--;
2600     }
2601     /* wait on sends of mat->i,j */
2602     /*---------------------------*/
2603     if (nsends) {
2604       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2605     }
2606   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2607 
2608   /* post receives, send and receive mat->a */
2609   /*----------------------------------------*/
2610   for (imdex=0; imdex<nrecvs; imdex++) {
2611     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2612   }
2613   for (i=0; i<nsends; i++) {
2614     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2615   }
2616   count = nrecvs;
2617   while (count) {
2618     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2619     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2620     count--;
2621   }
2622   if (nsends) {
2623     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2624   }
2625 
2626   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2627 
2628   /* create redundant matrix */
2629   /*-------------------------*/
2630   if (reuse == MAT_INITIAL_MATRIX) {
2631     const PetscInt *range;
2632     PetscInt       rstart_sub,rend_sub,mloc_sub;
2633 
2634     /* compute rownz_max for preallocation */
2635     for (imdex=0; imdex<nrecvs; imdex++) {
2636       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2637       rptr = rbuf_j[imdex];
2638       for (i=0; i<j; i++) {
2639         ncols = rptr[i+1] - rptr[i];
2640         if (rownz_max < ncols) rownz_max = ncols;
2641       }
2642     }
2643 
2644     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2645 
2646     /* get local size of redundant matrix
2647        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2648     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2649     rstart_sub = range[nsubcomm*subrank];
2650     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2651       rend_sub = range[nsubcomm*(subrank+1)];
2652     } else {
2653       rend_sub = mat->rmap->N;
2654     }
2655     mloc_sub = rend_sub - rstart_sub;
2656 
2657     if (M == N) {
2658       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2659     } else { /* non-square matrix */
2660       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2661     }
2662     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2663     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2664     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2665     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2666   } else {
2667     C = *matredundant;
2668   }
2669 
2670   /* insert local matrix entries */
2671   rptr = sbuf_j;
2672   cols = sbuf_j + rend-rstart + 1;
2673   vals = sbuf_a;
2674   for (i=0; i<rend-rstart; i++) {
2675     row   = i + rstart;
2676     ncols = rptr[i+1] - rptr[i];
2677     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2678     vals += ncols;
2679     cols += ncols;
2680   }
2681   /* insert received matrix entries */
2682   for (imdex=0; imdex<nrecvs; imdex++) {
2683     rstart = rowrange[recv_rank[imdex]];
2684     rend   = rowrange[recv_rank[imdex]+1];
2685     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2686     rptr   = rbuf_j[imdex];
2687     cols   = rbuf_j[imdex] + rend-rstart + 1;
2688     vals   = rbuf_a[imdex];
2689     for (i=0; i<rend-rstart; i++) {
2690       row   = i + rstart;
2691       ncols = rptr[i+1] - rptr[i];
2692       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2693       vals += ncols;
2694       cols += ncols;
2695     }
2696   }
2697   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2698   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699 
2700   if (reuse == MAT_INITIAL_MATRIX) {
2701     *matredundant = C;
2702 
2703     /* create a supporting struct and attach it to C for reuse */
2704     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2705     if (subsize == 1) {
2706       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2707       c->redundant = redund;
2708     } else {
2709       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2710       c->redundant = redund;
2711     }
2712 
2713     redund->nzlocal   = nzlocal;
2714     redund->nsends    = nsends;
2715     redund->nrecvs    = nrecvs;
2716     redund->send_rank = send_rank;
2717     redund->recv_rank = recv_rank;
2718     redund->sbuf_nz   = sbuf_nz;
2719     redund->rbuf_nz   = rbuf_nz;
2720     redund->sbuf_j    = sbuf_j;
2721     redund->sbuf_a    = sbuf_a;
2722     redund->rbuf_j    = rbuf_j;
2723     redund->rbuf_a    = rbuf_a;
2724     redund->psubcomm  = NULL;
2725   }
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2731 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2732 {
2733   PetscErrorCode ierr;
2734   MPI_Comm       comm;
2735   PetscMPIInt    size,subsize;
2736   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2737   Mat_Redundant  *redund=NULL;
2738   PetscSubcomm   psubcomm=NULL;
2739   MPI_Comm       subcomm_in=subcomm;
2740   Mat            *matseq;
2741   IS             isrow,iscol;
2742 
2743   PetscFunctionBegin;
2744   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2745     if (reuse ==  MAT_INITIAL_MATRIX) {
2746       /* create psubcomm, then get subcomm */
2747       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2748       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2749       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2750 
2751       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2752       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2753       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2754       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2755       subcomm = psubcomm->comm;
2756     } else { /* retrieve psubcomm and subcomm */
2757       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2758       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2759       if (subsize == 1) {
2760         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2761         redund = c->redundant;
2762       } else {
2763         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2764         redund = c->redundant;
2765       }
2766       psubcomm = redund->psubcomm;
2767     }
2768     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2769       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2770       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2771         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2772         if (subsize == 1) {
2773           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2774           c->redundant->psubcomm = psubcomm;
2775         } else {
2776           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2777           c->redundant->psubcomm = psubcomm ;
2778         }
2779       }
2780       PetscFunctionReturn(0);
2781     }
2782   }
2783 
2784   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2785   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2786   if (reuse == MAT_INITIAL_MATRIX) {
2787     /* create a local sequential matrix matseq[0] */
2788     mloc_sub = PETSC_DECIDE;
2789     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2790     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2791     rstart = rend - mloc_sub;
2792     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2793     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2794   } else { /* reuse == MAT_REUSE_MATRIX */
2795     if (subsize == 1) {
2796       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2797       redund = c->redundant;
2798     } else {
2799       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2800       redund = c->redundant;
2801     }
2802 
2803     isrow  = redund->isrow;
2804     iscol  = redund->iscol;
2805     matseq = redund->matseq;
2806   }
2807   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2808   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2809 
2810   if (reuse == MAT_INITIAL_MATRIX) {
2811     /* create a supporting struct and attach it to C for reuse */
2812     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2813     if (subsize == 1) {
2814       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2815       c->redundant = redund;
2816     } else {
2817       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2818       c->redundant = redund;
2819     }
2820     redund->isrow    = isrow;
2821     redund->iscol    = iscol;
2822     redund->matseq   = matseq;
2823     redund->psubcomm = psubcomm;
2824   }
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2831 {
2832   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2833   PetscErrorCode ierr;
2834   PetscInt       i,*idxb = 0;
2835   PetscScalar    *va,*vb;
2836   Vec            vtmp;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2840   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2841   if (idx) {
2842     for (i=0; i<A->rmap->n; i++) {
2843       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2844     }
2845   }
2846 
2847   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2848   if (idx) {
2849     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2850   }
2851   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2852   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2853 
2854   for (i=0; i<A->rmap->n; i++) {
2855     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2856       va[i] = vb[i];
2857       if (idx) idx[i] = a->garray[idxb[i]];
2858     }
2859   }
2860 
2861   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2862   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2863   ierr = PetscFree(idxb);CHKERRQ(ierr);
2864   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2871 {
2872   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2873   PetscErrorCode ierr;
2874   PetscInt       i,*idxb = 0;
2875   PetscScalar    *va,*vb;
2876   Vec            vtmp;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2880   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2881   if (idx) {
2882     for (i=0; i<A->cmap->n; i++) {
2883       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2884     }
2885   }
2886 
2887   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2888   if (idx) {
2889     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2890   }
2891   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2892   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2893 
2894   for (i=0; i<A->rmap->n; i++) {
2895     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2896       va[i] = vb[i];
2897       if (idx) idx[i] = a->garray[idxb[i]];
2898     }
2899   }
2900 
2901   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2902   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2903   ierr = PetscFree(idxb);CHKERRQ(ierr);
2904   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 #undef __FUNCT__
2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911 {
2912   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2913   PetscInt       n      = A->rmap->n;
2914   PetscInt       cstart = A->cmap->rstart;
2915   PetscInt       *cmap  = mat->garray;
2916   PetscInt       *diagIdx, *offdiagIdx;
2917   Vec            diagV, offdiagV;
2918   PetscScalar    *a, *diagA, *offdiagA;
2919   PetscInt       r;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2924   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2925   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2926   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2927   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2928   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2929   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2930   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2931   for (r = 0; r < n; ++r) {
2932     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2933       a[r]   = diagA[r];
2934       idx[r] = cstart + diagIdx[r];
2935     } else {
2936       a[r]   = offdiagA[r];
2937       idx[r] = cmap[offdiagIdx[r]];
2938     }
2939   }
2940   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2944   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2945   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2952 {
2953   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2954   PetscInt       n      = A->rmap->n;
2955   PetscInt       cstart = A->cmap->rstart;
2956   PetscInt       *cmap  = mat->garray;
2957   PetscInt       *diagIdx, *offdiagIdx;
2958   Vec            diagV, offdiagV;
2959   PetscScalar    *a, *diagA, *offdiagA;
2960   PetscInt       r;
2961   PetscErrorCode ierr;
2962 
2963   PetscFunctionBegin;
2964   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2965   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2966   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2967   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2968   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2969   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2970   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2971   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2972   for (r = 0; r < n; ++r) {
2973     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2974       a[r]   = diagA[r];
2975       idx[r] = cstart + diagIdx[r];
2976     } else {
2977       a[r]   = offdiagA[r];
2978       idx[r] = cmap[offdiagIdx[r]];
2979     }
2980   }
2981   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2985   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2986   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2993 {
2994   PetscErrorCode ierr;
2995   Mat            *dummy;
2996 
2997   PetscFunctionBegin;
2998   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2999   *newmat = *dummy;
3000   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3006 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3007 {
3008   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3018 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3019 {
3020   PetscErrorCode ierr;
3021   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3022 
3023   PetscFunctionBegin;
3024   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3025   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3026   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3027   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 /* -------------------------------------------------------------------*/
3032 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3033                                        MatGetRow_MPIAIJ,
3034                                        MatRestoreRow_MPIAIJ,
3035                                        MatMult_MPIAIJ,
3036                                 /* 4*/ MatMultAdd_MPIAIJ,
3037                                        MatMultTranspose_MPIAIJ,
3038                                        MatMultTransposeAdd_MPIAIJ,
3039 #if defined(PETSC_HAVE_PBGL)
3040                                        MatSolve_MPIAIJ,
3041 #else
3042                                        0,
3043 #endif
3044                                        0,
3045                                        0,
3046                                 /*10*/ 0,
3047                                        0,
3048                                        0,
3049                                        MatSOR_MPIAIJ,
3050                                        MatTranspose_MPIAIJ,
3051                                 /*15*/ MatGetInfo_MPIAIJ,
3052                                        MatEqual_MPIAIJ,
3053                                        MatGetDiagonal_MPIAIJ,
3054                                        MatDiagonalScale_MPIAIJ,
3055                                        MatNorm_MPIAIJ,
3056                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3057                                        MatAssemblyEnd_MPIAIJ,
3058                                        MatSetOption_MPIAIJ,
3059                                        MatZeroEntries_MPIAIJ,
3060                                 /*24*/ MatZeroRows_MPIAIJ,
3061                                        0,
3062 #if defined(PETSC_HAVE_PBGL)
3063                                        0,
3064 #else
3065                                        0,
3066 #endif
3067                                        0,
3068                                        0,
3069                                 /*29*/ MatSetUp_MPIAIJ,
3070 #if defined(PETSC_HAVE_PBGL)
3071                                        0,
3072 #else
3073                                        0,
3074 #endif
3075                                        0,
3076                                        0,
3077                                        0,
3078                                 /*34*/ MatDuplicate_MPIAIJ,
3079                                        0,
3080                                        0,
3081                                        0,
3082                                        0,
3083                                 /*39*/ MatAXPY_MPIAIJ,
3084                                        MatGetSubMatrices_MPIAIJ,
3085                                        MatIncreaseOverlap_MPIAIJ,
3086                                        MatGetValues_MPIAIJ,
3087                                        MatCopy_MPIAIJ,
3088                                 /*44*/ MatGetRowMax_MPIAIJ,
3089                                        MatScale_MPIAIJ,
3090                                        0,
3091                                        0,
3092                                        MatZeroRowsColumns_MPIAIJ,
3093                                 /*49*/ MatSetRandom_MPIAIJ,
3094                                        0,
3095                                        0,
3096                                        0,
3097                                        0,
3098                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3099                                        0,
3100                                        MatSetUnfactored_MPIAIJ,
3101                                        MatPermute_MPIAIJ,
3102                                        0,
3103                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3104                                        MatDestroy_MPIAIJ,
3105                                        MatView_MPIAIJ,
3106                                        0,
3107                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3108                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3109                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3110                                        0,
3111                                        0,
3112                                        0,
3113                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3114                                        MatGetRowMinAbs_MPIAIJ,
3115                                        0,
3116                                        MatSetColoring_MPIAIJ,
3117                                        0,
3118                                        MatSetValuesAdifor_MPIAIJ,
3119                                 /*75*/ MatFDColoringApply_AIJ,
3120                                        0,
3121                                        0,
3122                                        0,
3123                                        MatFindZeroDiagonals_MPIAIJ,
3124                                 /*80*/ 0,
3125                                        0,
3126                                        0,
3127                                 /*83*/ MatLoad_MPIAIJ,
3128                                        0,
3129                                        0,
3130                                        0,
3131                                        0,
3132                                        0,
3133                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3134                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3135                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3136                                        MatPtAP_MPIAIJ_MPIAIJ,
3137                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3138                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                 /*99*/ 0,
3144                                        0,
3145                                        0,
3146                                        MatConjugate_MPIAIJ,
3147                                        0,
3148                                 /*104*/MatSetValuesRow_MPIAIJ,
3149                                        MatRealPart_MPIAIJ,
3150                                        MatImaginaryPart_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                 /*109*/0,
3154                                        MatGetRedundantMatrix_MPIAIJ,
3155                                        MatGetRowMin_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3159                                        0,
3160                                        0,
3161                                        0,
3162                                        0,
3163                                 /*119*/0,
3164                                        0,
3165                                        0,
3166                                        0,
3167                                        MatGetMultiProcBlock_MPIAIJ,
3168                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3169                                        MatGetColumnNorms_MPIAIJ,
3170                                        MatInvertBlockDiagonal_MPIAIJ,
3171                                        0,
3172                                        MatGetSubMatricesParallel_MPIAIJ,
3173                                 /*129*/0,
3174                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3175                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3176                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3177                                        0,
3178                                 /*134*/0,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                        0,
3183                                 /*139*/0,
3184                                        0,
3185                                        0,
3186                                        MatFDColoringSetUp_MPIXAIJ
3187 };
3188 
3189 /* ----------------------------------------------------------------------------------------*/
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3193 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3194 {
3195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3196   PetscErrorCode ierr;
3197 
3198   PetscFunctionBegin;
3199   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3200   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3206 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3207 {
3208   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3213   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 #undef __FUNCT__
3218 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3219 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3220 {
3221   Mat_MPIAIJ     *b;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3226   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3227   b = (Mat_MPIAIJ*)B->data;
3228 
3229   if (!B->preallocated) {
3230     /* Explicitly create 2 MATSEQAIJ matrices. */
3231     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3232     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3233     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3234     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3235     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3236     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3237     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3238     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3239     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3240     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3241   }
3242 
3243   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3244   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3245   B->preallocated = PETSC_TRUE;
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3251 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3252 {
3253   Mat            mat;
3254   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   *newmat = 0;
3259   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3260   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3261   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3262   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3263   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3264   a       = (Mat_MPIAIJ*)mat->data;
3265 
3266   mat->factortype   = matin->factortype;
3267   mat->assembled    = PETSC_TRUE;
3268   mat->insertmode   = NOT_SET_VALUES;
3269   mat->preallocated = PETSC_TRUE;
3270 
3271   a->size         = oldmat->size;
3272   a->rank         = oldmat->rank;
3273   a->donotstash   = oldmat->donotstash;
3274   a->roworiented  = oldmat->roworiented;
3275   a->rowindices   = 0;
3276   a->rowvalues    = 0;
3277   a->getrowactive = PETSC_FALSE;
3278 
3279   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3280   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3281 
3282   if (oldmat->colmap) {
3283 #if defined(PETSC_USE_CTABLE)
3284     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3285 #else
3286     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3287     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3288     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3289 #endif
3290   } else a->colmap = 0;
3291   if (oldmat->garray) {
3292     PetscInt len;
3293     len  = oldmat->B->cmap->n;
3294     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3295     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3296     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3297   } else a->garray = 0;
3298 
3299   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3300   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3301   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3302   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3303   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3304   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3305   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3306   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3307   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3308   *newmat = mat;
3309   PetscFunctionReturn(0);
3310 }
3311 
3312 
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatLoad_MPIAIJ"
3316 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3317 {
3318   PetscScalar    *vals,*svals;
3319   MPI_Comm       comm;
3320   PetscErrorCode ierr;
3321   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3322   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
3323   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3324   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3325   PetscInt       cend,cstart,n,*rowners;
3326   int            fd;
3327   PetscInt       bs = newMat->rmap->bs;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3332   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3333   if (!rank) {
3334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3335     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3336     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3337   }
3338 
3339   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3340   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3341   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3342   if (bs < 0) bs = 1;
3343 
3344   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3345   M    = header[1]; N = header[2];
3346 
3347   /* If global sizes are set, check if they are consistent with that given in the file */
3348   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3349   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3350 
3351   /* determine ownership of all (block) rows */
3352   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3353   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3354   else m = newMat->rmap->n; /* Set by user */
3355 
3356   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3357   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3358 
3359   /* First process needs enough room for process with most rows */
3360   if (!rank) {
3361     mmax = rowners[1];
3362     for (i=2; i<=size; i++) {
3363       mmax = PetscMax(mmax, rowners[i]);
3364     }
3365   } else mmax = -1;             /* unused, but compilers complain */
3366 
3367   rowners[0] = 0;
3368   for (i=2; i<=size; i++) {
3369     rowners[i] += rowners[i-1];
3370   }
3371   rstart = rowners[rank];
3372   rend   = rowners[rank+1];
3373 
3374   /* distribute row lengths to all processors */
3375   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3376   if (!rank) {
3377     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3378     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3379     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3380     for (j=0; j<m; j++) {
3381       procsnz[0] += ourlens[j];
3382     }
3383     for (i=1; i<size; i++) {
3384       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3385       /* calculate the number of nonzeros on each processor */
3386       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3387         procsnz[i] += rowlengths[j];
3388       }
3389       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3390     }
3391     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3392   } else {
3393     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3394   }
3395 
3396   if (!rank) {
3397     /* determine max buffer needed and allocate it */
3398     maxnz = 0;
3399     for (i=0; i<size; i++) {
3400       maxnz = PetscMax(maxnz,procsnz[i]);
3401     }
3402     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3403 
3404     /* read in my part of the matrix column indices  */
3405     nz   = procsnz[0];
3406     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3407     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3408 
3409     /* read in every one elses and ship off */
3410     for (i=1; i<size; i++) {
3411       nz   = procsnz[i];
3412       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3413       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3414     }
3415     ierr = PetscFree(cols);CHKERRQ(ierr);
3416   } else {
3417     /* determine buffer space needed for message */
3418     nz = 0;
3419     for (i=0; i<m; i++) {
3420       nz += ourlens[i];
3421     }
3422     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3423 
3424     /* receive message of column indices*/
3425     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3426   }
3427 
3428   /* determine column ownership if matrix is not square */
3429   if (N != M) {
3430     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3431     else n = newMat->cmap->n;
3432     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3433     cstart = cend - n;
3434   } else {
3435     cstart = rstart;
3436     cend   = rend;
3437     n      = cend - cstart;
3438   }
3439 
3440   /* loop over local rows, determining number of off diagonal entries */
3441   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3442   jj   = 0;
3443   for (i=0; i<m; i++) {
3444     for (j=0; j<ourlens[i]; j++) {
3445       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3446       jj++;
3447     }
3448   }
3449 
3450   for (i=0; i<m; i++) {
3451     ourlens[i] -= offlens[i];
3452   }
3453   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3454 
3455   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3456 
3457   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3458 
3459   for (i=0; i<m; i++) {
3460     ourlens[i] += offlens[i];
3461   }
3462 
3463   if (!rank) {
3464     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3465 
3466     /* read in my part of the matrix numerical values  */
3467     nz   = procsnz[0];
3468     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3469 
3470     /* insert into matrix */
3471     jj      = rstart;
3472     smycols = mycols;
3473     svals   = vals;
3474     for (i=0; i<m; i++) {
3475       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3476       smycols += ourlens[i];
3477       svals   += ourlens[i];
3478       jj++;
3479     }
3480 
3481     /* read in other processors and ship out */
3482     for (i=1; i<size; i++) {
3483       nz   = procsnz[i];
3484       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3485       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3486     }
3487     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3488   } else {
3489     /* receive numeric values */
3490     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3491 
3492     /* receive message of values*/
3493     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3494 
3495     /* insert into matrix */
3496     jj      = rstart;
3497     smycols = mycols;
3498     svals   = vals;
3499     for (i=0; i<m; i++) {
3500       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3501       smycols += ourlens[i];
3502       svals   += ourlens[i];
3503       jj++;
3504     }
3505   }
3506   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3507   ierr = PetscFree(vals);CHKERRQ(ierr);
3508   ierr = PetscFree(mycols);CHKERRQ(ierr);
3509   ierr = PetscFree(rowners);CHKERRQ(ierr);
3510   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3511   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 #undef __FUNCT__
3516 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3517 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3518 {
3519   PetscErrorCode ierr;
3520   IS             iscol_local;
3521   PetscInt       csize;
3522 
3523   PetscFunctionBegin;
3524   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3525   if (call == MAT_REUSE_MATRIX) {
3526     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3527     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3528   } else {
3529     PetscInt cbs;
3530     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3531     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3532     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3533   }
3534   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3535   if (call == MAT_INITIAL_MATRIX) {
3536     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3537     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3538   }
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3545 /*
3546     Not great since it makes two copies of the submatrix, first an SeqAIJ
3547   in local and then by concatenating the local matrices the end result.
3548   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3549 
3550   Note: This requires a sequential iscol with all indices.
3551 */
3552 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3553 {
3554   PetscErrorCode ierr;
3555   PetscMPIInt    rank,size;
3556   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3557   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3558   PetscBool      allcolumns, colflag;
3559   Mat            M,Mreuse;
3560   MatScalar      *vwork,*aa;
3561   MPI_Comm       comm;
3562   Mat_SeqAIJ     *aij;
3563 
3564   PetscFunctionBegin;
3565   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3566   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3567   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3568 
3569   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3570   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3571   if (colflag && ncol == mat->cmap->N) {
3572     allcolumns = PETSC_TRUE;
3573   } else {
3574     allcolumns = PETSC_FALSE;
3575   }
3576   if (call ==  MAT_REUSE_MATRIX) {
3577     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3578     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3579     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3580   } else {
3581     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3582   }
3583 
3584   /*
3585       m - number of local rows
3586       n - number of columns (same on all processors)
3587       rstart - first row in new global matrix generated
3588   */
3589   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3590   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3591   if (call == MAT_INITIAL_MATRIX) {
3592     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3593     ii  = aij->i;
3594     jj  = aij->j;
3595 
3596     /*
3597         Determine the number of non-zeros in the diagonal and off-diagonal
3598         portions of the matrix in order to do correct preallocation
3599     */
3600 
3601     /* first get start and end of "diagonal" columns */
3602     if (csize == PETSC_DECIDE) {
3603       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3604       if (mglobal == n) { /* square matrix */
3605         nlocal = m;
3606       } else {
3607         nlocal = n/size + ((n % size) > rank);
3608       }
3609     } else {
3610       nlocal = csize;
3611     }
3612     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3613     rstart = rend - nlocal;
3614     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3615 
3616     /* next, compute all the lengths */
3617     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3618     olens = dlens + m;
3619     for (i=0; i<m; i++) {
3620       jend = ii[i+1] - ii[i];
3621       olen = 0;
3622       dlen = 0;
3623       for (j=0; j<jend; j++) {
3624         if (*jj < rstart || *jj >= rend) olen++;
3625         else dlen++;
3626         jj++;
3627       }
3628       olens[i] = olen;
3629       dlens[i] = dlen;
3630     }
3631     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3632     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3633     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3634     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3635     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3636     ierr = PetscFree(dlens);CHKERRQ(ierr);
3637   } else {
3638     PetscInt ml,nl;
3639 
3640     M    = *newmat;
3641     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3642     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3643     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3644     /*
3645          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3646        rather than the slower MatSetValues().
3647     */
3648     M->was_assembled = PETSC_TRUE;
3649     M->assembled     = PETSC_FALSE;
3650   }
3651   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3652   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3653   ii   = aij->i;
3654   jj   = aij->j;
3655   aa   = aij->a;
3656   for (i=0; i<m; i++) {
3657     row   = rstart + i;
3658     nz    = ii[i+1] - ii[i];
3659     cwork = jj;     jj += nz;
3660     vwork = aa;     aa += nz;
3661     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3662   }
3663 
3664   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3665   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3666   *newmat = M;
3667 
3668   /* save submatrix used in processor for next request */
3669   if (call ==  MAT_INITIAL_MATRIX) {
3670     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3671     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3672   }
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3678 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3679 {
3680   PetscInt       m,cstart, cend,j,nnz,i,d;
3681   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3682   const PetscInt *JJ;
3683   PetscScalar    *values;
3684   PetscErrorCode ierr;
3685 
3686   PetscFunctionBegin;
3687   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3688 
3689   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3690   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3691   m      = B->rmap->n;
3692   cstart = B->cmap->rstart;
3693   cend   = B->cmap->rend;
3694   rstart = B->rmap->rstart;
3695 
3696   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3697 
3698 #if defined(PETSC_USE_DEBUGGING)
3699   for (i=0; i<m; i++) {
3700     nnz = Ii[i+1]- Ii[i];
3701     JJ  = J + Ii[i];
3702     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3703     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3704     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3705   }
3706 #endif
3707 
3708   for (i=0; i<m; i++) {
3709     nnz     = Ii[i+1]- Ii[i];
3710     JJ      = J + Ii[i];
3711     nnz_max = PetscMax(nnz_max,nnz);
3712     d       = 0;
3713     for (j=0; j<nnz; j++) {
3714       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3715     }
3716     d_nnz[i] = d;
3717     o_nnz[i] = nnz - d;
3718   }
3719   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3720   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3721 
3722   if (v) values = (PetscScalar*)v;
3723   else {
3724     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3725   }
3726 
3727   for (i=0; i<m; i++) {
3728     ii   = i + rstart;
3729     nnz  = Ii[i+1]- Ii[i];
3730     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3731   }
3732   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3733   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3734 
3735   if (!v) {
3736     ierr = PetscFree(values);CHKERRQ(ierr);
3737   }
3738   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 #undef __FUNCT__
3743 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3744 /*@
3745    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3746    (the default parallel PETSc format).
3747 
3748    Collective on MPI_Comm
3749 
3750    Input Parameters:
3751 +  B - the matrix
3752 .  i - the indices into j for the start of each local row (starts with zero)
3753 .  j - the column indices for each local row (starts with zero)
3754 -  v - optional values in the matrix
3755 
3756    Level: developer
3757 
3758    Notes:
3759        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3760      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3761      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3762 
3763        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3764 
3765        The format which is used for the sparse matrix input, is equivalent to a
3766     row-major ordering.. i.e for the following matrix, the input data expected is
3767     as shown:
3768 
3769         1 0 0
3770         2 0 3     P0
3771        -------
3772         4 5 6     P1
3773 
3774      Process0 [P0]: rows_owned=[0,1]
3775         i =  {0,1,3}  [size = nrow+1  = 2+1]
3776         j =  {0,0,2}  [size = nz = 6]
3777         v =  {1,2,3}  [size = nz = 6]
3778 
3779      Process1 [P1]: rows_owned=[2]
3780         i =  {0,3}    [size = nrow+1  = 1+1]
3781         j =  {0,1,2}  [size = nz = 6]
3782         v =  {4,5,6}  [size = nz = 6]
3783 
3784 .keywords: matrix, aij, compressed row, sparse, parallel
3785 
3786 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3787           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3788 @*/
3789 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3790 {
3791   PetscErrorCode ierr;
3792 
3793   PetscFunctionBegin;
3794   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 #undef __FUNCT__
3799 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3800 /*@C
3801    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3802    (the default parallel PETSc format).  For good matrix assembly performance
3803    the user should preallocate the matrix storage by setting the parameters
3804    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3805    performance can be increased by more than a factor of 50.
3806 
3807    Collective on MPI_Comm
3808 
3809    Input Parameters:
3810 +  B - the matrix
3811 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3812            (same value is used for all local rows)
3813 .  d_nnz - array containing the number of nonzeros in the various rows of the
3814            DIAGONAL portion of the local submatrix (possibly different for each row)
3815            or NULL, if d_nz is used to specify the nonzero structure.
3816            The size of this array is equal to the number of local rows, i.e 'm'.
3817            For matrices that will be factored, you must leave room for (and set)
3818            the diagonal entry even if it is zero.
3819 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3820            submatrix (same value is used for all local rows).
3821 -  o_nnz - array containing the number of nonzeros in the various rows of the
3822            OFF-DIAGONAL portion of the local submatrix (possibly different for
3823            each row) or NULL, if o_nz is used to specify the nonzero
3824            structure. The size of this array is equal to the number
3825            of local rows, i.e 'm'.
3826 
3827    If the *_nnz parameter is given then the *_nz parameter is ignored
3828 
3829    The AIJ format (also called the Yale sparse matrix format or
3830    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3831    storage.  The stored row and column indices begin with zero.
3832    See Users-Manual: ch_mat for details.
3833 
3834    The parallel matrix is partitioned such that the first m0 rows belong to
3835    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3836    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3837 
3838    The DIAGONAL portion of the local submatrix of a processor can be defined
3839    as the submatrix which is obtained by extraction the part corresponding to
3840    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3841    first row that belongs to the processor, r2 is the last row belonging to
3842    the this processor, and c1-c2 is range of indices of the local part of a
3843    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3844    common case of a square matrix, the row and column ranges are the same and
3845    the DIAGONAL part is also square. The remaining portion of the local
3846    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3847 
3848    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3849 
3850    You can call MatGetInfo() to get information on how effective the preallocation was;
3851    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3852    You can also run with the option -info and look for messages with the string
3853    malloc in them to see if additional memory allocation was needed.
3854 
3855    Example usage:
3856 
3857    Consider the following 8x8 matrix with 34 non-zero values, that is
3858    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3859    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3860    as follows:
3861 
3862 .vb
3863             1  2  0  |  0  3  0  |  0  4
3864     Proc0   0  5  6  |  7  0  0  |  8  0
3865             9  0 10  | 11  0  0  | 12  0
3866     -------------------------------------
3867            13  0 14  | 15 16 17  |  0  0
3868     Proc1   0 18  0  | 19 20 21  |  0  0
3869             0  0  0  | 22 23  0  | 24  0
3870     -------------------------------------
3871     Proc2  25 26 27  |  0  0 28  | 29  0
3872            30  0  0  | 31 32 33  |  0 34
3873 .ve
3874 
3875    This can be represented as a collection of submatrices as:
3876 
3877 .vb
3878       A B C
3879       D E F
3880       G H I
3881 .ve
3882 
3883    Where the submatrices A,B,C are owned by proc0, D,E,F are
3884    owned by proc1, G,H,I are owned by proc2.
3885 
3886    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3887    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3888    The 'M','N' parameters are 8,8, and have the same values on all procs.
3889 
3890    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3891    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3892    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3893    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3894    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3895    matrix, ans [DF] as another SeqAIJ matrix.
3896 
3897    When d_nz, o_nz parameters are specified, d_nz storage elements are
3898    allocated for every row of the local diagonal submatrix, and o_nz
3899    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3900    One way to choose d_nz and o_nz is to use the max nonzerors per local
3901    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3902    In this case, the values of d_nz,o_nz are:
3903 .vb
3904      proc0 : dnz = 2, o_nz = 2
3905      proc1 : dnz = 3, o_nz = 2
3906      proc2 : dnz = 1, o_nz = 4
3907 .ve
3908    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3909    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3910    for proc3. i.e we are using 12+15+10=37 storage locations to store
3911    34 values.
3912 
3913    When d_nnz, o_nnz parameters are specified, the storage is specified
3914    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3915    In the above case the values for d_nnz,o_nnz are:
3916 .vb
3917      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3918      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3919      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3920 .ve
3921    Here the space allocated is sum of all the above values i.e 34, and
3922    hence pre-allocation is perfect.
3923 
3924    Level: intermediate
3925 
3926 .keywords: matrix, aij, compressed row, sparse, parallel
3927 
3928 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3929           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3930 @*/
3931 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3932 {
3933   PetscErrorCode ierr;
3934 
3935   PetscFunctionBegin;
3936   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3937   PetscValidType(B,1);
3938   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 #undef __FUNCT__
3943 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3944 /*@
3945      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3946          CSR format the local rows.
3947 
3948    Collective on MPI_Comm
3949 
3950    Input Parameters:
3951 +  comm - MPI communicator
3952 .  m - number of local rows (Cannot be PETSC_DECIDE)
3953 .  n - This value should be the same as the local size used in creating the
3954        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3955        calculated if N is given) For square matrices n is almost always m.
3956 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3957 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3958 .   i - row indices
3959 .   j - column indices
3960 -   a - matrix values
3961 
3962    Output Parameter:
3963 .   mat - the matrix
3964 
3965    Level: intermediate
3966 
3967    Notes:
3968        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3969      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3970      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3971 
3972        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3973 
3974        The format which is used for the sparse matrix input, is equivalent to a
3975     row-major ordering.. i.e for the following matrix, the input data expected is
3976     as shown:
3977 
3978         1 0 0
3979         2 0 3     P0
3980        -------
3981         4 5 6     P1
3982 
3983      Process0 [P0]: rows_owned=[0,1]
3984         i =  {0,1,3}  [size = nrow+1  = 2+1]
3985         j =  {0,0,2}  [size = nz = 6]
3986         v =  {1,2,3}  [size = nz = 6]
3987 
3988      Process1 [P1]: rows_owned=[2]
3989         i =  {0,3}    [size = nrow+1  = 1+1]
3990         j =  {0,1,2}  [size = nz = 6]
3991         v =  {4,5,6}  [size = nz = 6]
3992 
3993 .keywords: matrix, aij, compressed row, sparse, parallel
3994 
3995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3996           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3997 @*/
3998 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3999 {
4000   PetscErrorCode ierr;
4001 
4002   PetscFunctionBegin;
4003   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4004   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4005   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4006   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4007   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4008   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4009   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatCreateAIJ"
4015 /*@C
4016    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4017    (the default parallel PETSc format).  For good matrix assembly performance
4018    the user should preallocate the matrix storage by setting the parameters
4019    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4020    performance can be increased by more than a factor of 50.
4021 
4022    Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +  comm - MPI communicator
4026 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4027            This value should be the same as the local size used in creating the
4028            y vector for the matrix-vector product y = Ax.
4029 .  n - This value should be the same as the local size used in creating the
4030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4031        calculated if N is given) For square matrices n is almost always m.
4032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4034 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4035            (same value is used for all local rows)
4036 .  d_nnz - array containing the number of nonzeros in the various rows of the
4037            DIAGONAL portion of the local submatrix (possibly different for each row)
4038            or NULL, if d_nz is used to specify the nonzero structure.
4039            The size of this array is equal to the number of local rows, i.e 'm'.
4040 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4041            submatrix (same value is used for all local rows).
4042 -  o_nnz - array containing the number of nonzeros in the various rows of the
4043            OFF-DIAGONAL portion of the local submatrix (possibly different for
4044            each row) or NULL, if o_nz is used to specify the nonzero
4045            structure. The size of this array is equal to the number
4046            of local rows, i.e 'm'.
4047 
4048    Output Parameter:
4049 .  A - the matrix
4050 
4051    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4052    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4053    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4054 
4055    Notes:
4056    If the *_nnz parameter is given then the *_nz parameter is ignored
4057 
4058    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4059    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4060    storage requirements for this matrix.
4061 
4062    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4063    processor than it must be used on all processors that share the object for
4064    that argument.
4065 
4066    The user MUST specify either the local or global matrix dimensions
4067    (possibly both).
4068 
4069    The parallel matrix is partitioned across processors such that the
4070    first m0 rows belong to process 0, the next m1 rows belong to
4071    process 1, the next m2 rows belong to process 2 etc.. where
4072    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4073    values corresponding to [m x N] submatrix.
4074 
4075    The columns are logically partitioned with the n0 columns belonging
4076    to 0th partition, the next n1 columns belonging to the next
4077    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4078 
4079    The DIAGONAL portion of the local submatrix on any given processor
4080    is the submatrix corresponding to the rows and columns m,n
4081    corresponding to the given processor. i.e diagonal matrix on
4082    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4083    etc. The remaining portion of the local submatrix [m x (N-n)]
4084    constitute the OFF-DIAGONAL portion. The example below better
4085    illustrates this concept.
4086 
4087    For a square global matrix we define each processor's diagonal portion
4088    to be its local rows and the corresponding columns (a square submatrix);
4089    each processor's off-diagonal portion encompasses the remainder of the
4090    local matrix (a rectangular submatrix).
4091 
4092    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4093 
4094    When calling this routine with a single process communicator, a matrix of
4095    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4096    type of communicator, use the construction mechanism:
4097      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4098 
4099    By default, this format uses inodes (identical nodes) when possible.
4100    We search for consecutive rows with the same nonzero structure, thereby
4101    reusing matrix information to achieve increased efficiency.
4102 
4103    Options Database Keys:
4104 +  -mat_no_inode  - Do not use inodes
4105 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4106 -  -mat_aij_oneindex - Internally use indexing starting at 1
4107         rather than 0.  Note that when calling MatSetValues(),
4108         the user still MUST index entries starting at 0!
4109 
4110 
4111    Example usage:
4112 
4113    Consider the following 8x8 matrix with 34 non-zero values, that is
4114    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4115    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4116    as follows:
4117 
4118 .vb
4119             1  2  0  |  0  3  0  |  0  4
4120     Proc0   0  5  6  |  7  0  0  |  8  0
4121             9  0 10  | 11  0  0  | 12  0
4122     -------------------------------------
4123            13  0 14  | 15 16 17  |  0  0
4124     Proc1   0 18  0  | 19 20 21  |  0  0
4125             0  0  0  | 22 23  0  | 24  0
4126     -------------------------------------
4127     Proc2  25 26 27  |  0  0 28  | 29  0
4128            30  0  0  | 31 32 33  |  0 34
4129 .ve
4130 
4131    This can be represented as a collection of submatrices as:
4132 
4133 .vb
4134       A B C
4135       D E F
4136       G H I
4137 .ve
4138 
4139    Where the submatrices A,B,C are owned by proc0, D,E,F are
4140    owned by proc1, G,H,I are owned by proc2.
4141 
4142    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4143    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4144    The 'M','N' parameters are 8,8, and have the same values on all procs.
4145 
4146    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4147    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4148    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4149    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4150    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4151    matrix, ans [DF] as another SeqAIJ matrix.
4152 
4153    When d_nz, o_nz parameters are specified, d_nz storage elements are
4154    allocated for every row of the local diagonal submatrix, and o_nz
4155    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4156    One way to choose d_nz and o_nz is to use the max nonzerors per local
4157    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4158    In this case, the values of d_nz,o_nz are:
4159 .vb
4160      proc0 : dnz = 2, o_nz = 2
4161      proc1 : dnz = 3, o_nz = 2
4162      proc2 : dnz = 1, o_nz = 4
4163 .ve
4164    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4165    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4166    for proc3. i.e we are using 12+15+10=37 storage locations to store
4167    34 values.
4168 
4169    When d_nnz, o_nnz parameters are specified, the storage is specified
4170    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4171    In the above case the values for d_nnz,o_nnz are:
4172 .vb
4173      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4174      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4175      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4176 .ve
4177    Here the space allocated is sum of all the above values i.e 34, and
4178    hence pre-allocation is perfect.
4179 
4180    Level: intermediate
4181 
4182 .keywords: matrix, aij, compressed row, sparse, parallel
4183 
4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4185           MPIAIJ, MatCreateMPIAIJWithArrays()
4186 @*/
4187 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4188 {
4189   PetscErrorCode ierr;
4190   PetscMPIInt    size;
4191 
4192   PetscFunctionBegin;
4193   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4194   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4196   if (size > 1) {
4197     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4198     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4199   } else {
4200     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4201     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4202   }
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 #undef __FUNCT__
4207 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4208 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4209 {
4210   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4211 
4212   PetscFunctionBegin;
4213   if (Ad)     *Ad     = a->A;
4214   if (Ao)     *Ao     = a->B;
4215   if (colmap) *colmap = a->garray;
4216   PetscFunctionReturn(0);
4217 }
4218 
4219 #undef __FUNCT__
4220 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4221 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4222 {
4223   PetscErrorCode ierr;
4224   PetscInt       i;
4225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4226 
4227   PetscFunctionBegin;
4228   if (coloring->ctype == IS_COLORING_GLOBAL) {
4229     ISColoringValue *allcolors,*colors;
4230     ISColoring      ocoloring;
4231 
4232     /* set coloring for diagonal portion */
4233     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4234 
4235     /* set coloring for off-diagonal portion */
4236     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4237     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4238     for (i=0; i<a->B->cmap->n; i++) {
4239       colors[i] = allcolors[a->garray[i]];
4240     }
4241     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4242     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4243     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4244     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4245   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4246     ISColoringValue *colors;
4247     PetscInt        *larray;
4248     ISColoring      ocoloring;
4249 
4250     /* set coloring for diagonal portion */
4251     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4252     for (i=0; i<a->A->cmap->n; i++) {
4253       larray[i] = i + A->cmap->rstart;
4254     }
4255     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4256     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4257     for (i=0; i<a->A->cmap->n; i++) {
4258       colors[i] = coloring->colors[larray[i]];
4259     }
4260     ierr = PetscFree(larray);CHKERRQ(ierr);
4261     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4262     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4263     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4264 
4265     /* set coloring for off-diagonal portion */
4266     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4267     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4268     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4269     for (i=0; i<a->B->cmap->n; i++) {
4270       colors[i] = coloring->colors[larray[i]];
4271     }
4272     ierr = PetscFree(larray);CHKERRQ(ierr);
4273     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4274     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4275     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4276   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4277   PetscFunctionReturn(0);
4278 }
4279 
4280 #undef __FUNCT__
4281 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4282 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4283 {
4284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4285   PetscErrorCode ierr;
4286 
4287   PetscFunctionBegin;
4288   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4289   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4290   PetscFunctionReturn(0);
4291 }
4292 
4293 #undef __FUNCT__
4294 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4295 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4296 {
4297   PetscErrorCode ierr;
4298   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4299   PetscInt       *indx;
4300 
4301   PetscFunctionBegin;
4302   /* This routine will ONLY return MPIAIJ type matrix */
4303   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4304   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4305   if (n == PETSC_DECIDE) {
4306     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4307   }
4308   /* Check sum(n) = N */
4309   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4310   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4311 
4312   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4313   rstart -= m;
4314 
4315   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4316   for (i=0; i<m; i++) {
4317     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4318     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4319     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4320   }
4321 
4322   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4323   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4324   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4325   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4326   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4327   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4333 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       m,N,i,rstart,nnz,Ii;
4337   PetscInt       *indx;
4338   PetscScalar    *values;
4339 
4340   PetscFunctionBegin;
4341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4342   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4343   for (i=0; i<m; i++) {
4344     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4345     Ii   = i + rstart;
4346     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4347     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4348   }
4349   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4350   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4351   PetscFunctionReturn(0);
4352 }
4353 
4354 #undef __FUNCT__
4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4356 /*@
4357       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4358                  matrices from each processor
4359 
4360     Collective on MPI_Comm
4361 
4362    Input Parameters:
4363 +    comm - the communicators the parallel matrix will live on
4364 .    inmat - the input sequential matrices
4365 .    n - number of local columns (or PETSC_DECIDE)
4366 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4367 
4368    Output Parameter:
4369 .    outmat - the parallel matrix generated
4370 
4371     Level: advanced
4372 
4373    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4374 
4375 @*/
4376 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4377 {
4378   PetscErrorCode ierr;
4379   PetscMPIInt    size;
4380 
4381   PetscFunctionBegin;
4382   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4383   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4384   if (size == 1) {
4385     if (scall == MAT_INITIAL_MATRIX) {
4386       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4387     } else {
4388       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4389     }
4390   } else {
4391     if (scall == MAT_INITIAL_MATRIX) {
4392       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4393     }
4394     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4395   }
4396   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4397   PetscFunctionReturn(0);
4398 }
4399 
4400 #undef __FUNCT__
4401 #define __FUNCT__ "MatFileSplit"
4402 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4403 {
4404   PetscErrorCode    ierr;
4405   PetscMPIInt       rank;
4406   PetscInt          m,N,i,rstart,nnz;
4407   size_t            len;
4408   const PetscInt    *indx;
4409   PetscViewer       out;
4410   char              *name;
4411   Mat               B;
4412   const PetscScalar *values;
4413 
4414   PetscFunctionBegin;
4415   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4416   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4417   /* Should this be the type of the diagonal block of A? */
4418   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4419   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4420   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4421   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4422   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4423   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4424   for (i=0; i<m; i++) {
4425     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4426     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4427     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4428   }
4429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4431 
4432   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4433   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4434   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4435   sprintf(name,"%s.%d",outfile,rank);
4436   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4437   ierr = PetscFree(name);CHKERRQ(ierr);
4438   ierr = MatView(B,out);CHKERRQ(ierr);
4439   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4440   ierr = MatDestroy(&B);CHKERRQ(ierr);
4441   PetscFunctionReturn(0);
4442 }
4443 
4444 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4447 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4448 {
4449   PetscErrorCode      ierr;
4450   Mat_Merge_SeqsToMPI *merge;
4451   PetscContainer      container;
4452 
4453   PetscFunctionBegin;
4454   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4455   if (container) {
4456     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4457     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4458     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4459     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4460     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4469     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4470     ierr = PetscFree(merge);CHKERRQ(ierr);
4471     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4472   }
4473   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 #include <../src/mat/utils/freespace.h>
4478 #include <petscbt.h>
4479 
4480 #undef __FUNCT__
4481 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4482 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4483 {
4484   PetscErrorCode      ierr;
4485   MPI_Comm            comm;
4486   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4487   PetscMPIInt         size,rank,taga,*len_s;
4488   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4489   PetscInt            proc,m;
4490   PetscInt            **buf_ri,**buf_rj;
4491   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4492   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4493   MPI_Request         *s_waits,*r_waits;
4494   MPI_Status          *status;
4495   MatScalar           *aa=a->a;
4496   MatScalar           **abuf_r,*ba_i;
4497   Mat_Merge_SeqsToMPI *merge;
4498   PetscContainer      container;
4499 
4500   PetscFunctionBegin;
4501   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4502   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4503 
4504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4505   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4506 
4507   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4508   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4509 
4510   bi     = merge->bi;
4511   bj     = merge->bj;
4512   buf_ri = merge->buf_ri;
4513   buf_rj = merge->buf_rj;
4514 
4515   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4516   owners = merge->rowmap->range;
4517   len_s  = merge->len_s;
4518 
4519   /* send and recv matrix values */
4520   /*-----------------------------*/
4521   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4522   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4523 
4524   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4525   for (proc=0,k=0; proc<size; proc++) {
4526     if (!len_s[proc]) continue;
4527     i    = owners[proc];
4528     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4529     k++;
4530   }
4531 
4532   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4533   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4534   ierr = PetscFree(status);CHKERRQ(ierr);
4535 
4536   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4537   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4538 
4539   /* insert mat values of mpimat */
4540   /*----------------------------*/
4541   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4542   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4543 
4544   for (k=0; k<merge->nrecv; k++) {
4545     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4546     nrows       = *(buf_ri_k[k]);
4547     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4548     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4549   }
4550 
4551   /* set values of ba */
4552   m = merge->rowmap->n;
4553   for (i=0; i<m; i++) {
4554     arow = owners[rank] + i;
4555     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4556     bnzi = bi[i+1] - bi[i];
4557     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4558 
4559     /* add local non-zero vals of this proc's seqmat into ba */
4560     anzi   = ai[arow+1] - ai[arow];
4561     aj     = a->j + ai[arow];
4562     aa     = a->a + ai[arow];
4563     nextaj = 0;
4564     for (j=0; nextaj<anzi; j++) {
4565       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4566         ba_i[j] += aa[nextaj++];
4567       }
4568     }
4569 
4570     /* add received vals into ba */
4571     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4572       /* i-th row */
4573       if (i == *nextrow[k]) {
4574         anzi   = *(nextai[k]+1) - *nextai[k];
4575         aj     = buf_rj[k] + *(nextai[k]);
4576         aa     = abuf_r[k] + *(nextai[k]);
4577         nextaj = 0;
4578         for (j=0; nextaj<anzi; j++) {
4579           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4580             ba_i[j] += aa[nextaj++];
4581           }
4582         }
4583         nextrow[k]++; nextai[k]++;
4584       }
4585     }
4586     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4587   }
4588   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590 
4591   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4592   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4593   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4594   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4595   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4600 
4601 #undef __FUNCT__
4602 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4603 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4604 {
4605   PetscErrorCode      ierr;
4606   Mat                 B_mpi;
4607   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4608   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4609   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4610   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4611   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4612   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4613   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4614   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4615   MPI_Status          *status;
4616   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4617   PetscBT             lnkbt;
4618   Mat_Merge_SeqsToMPI *merge;
4619   PetscContainer      container;
4620 
4621   PetscFunctionBegin;
4622   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4623 
4624   /* make sure it is a PETSc comm */
4625   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4627   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4628 
4629   ierr = PetscNew(&merge);CHKERRQ(ierr);
4630   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4631 
4632   /* determine row ownership */
4633   /*---------------------------------------------------------*/
4634   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4635   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4636   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4637   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4638   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4639   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4640   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4641 
4642   m      = merge->rowmap->n;
4643   owners = merge->rowmap->range;
4644 
4645   /* determine the number of messages to send, their lengths */
4646   /*---------------------------------------------------------*/
4647   len_s = merge->len_s;
4648 
4649   len          = 0; /* length of buf_si[] */
4650   merge->nsend = 0;
4651   for (proc=0; proc<size; proc++) {
4652     len_si[proc] = 0;
4653     if (proc == rank) {
4654       len_s[proc] = 0;
4655     } else {
4656       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4657       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4658     }
4659     if (len_s[proc]) {
4660       merge->nsend++;
4661       nrows = 0;
4662       for (i=owners[proc]; i<owners[proc+1]; i++) {
4663         if (ai[i+1] > ai[i]) nrows++;
4664       }
4665       len_si[proc] = 2*(nrows+1);
4666       len         += len_si[proc];
4667     }
4668   }
4669 
4670   /* determine the number and length of messages to receive for ij-structure */
4671   /*-------------------------------------------------------------------------*/
4672   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4673   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4674 
4675   /* post the Irecv of j-structure */
4676   /*-------------------------------*/
4677   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4678   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4679 
4680   /* post the Isend of j-structure */
4681   /*--------------------------------*/
4682   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4683 
4684   for (proc=0, k=0; proc<size; proc++) {
4685     if (!len_s[proc]) continue;
4686     i    = owners[proc];
4687     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4688     k++;
4689   }
4690 
4691   /* receives and sends of j-structure are complete */
4692   /*------------------------------------------------*/
4693   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4694   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4695 
4696   /* send and recv i-structure */
4697   /*---------------------------*/
4698   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4699   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4700 
4701   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4702   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4703   for (proc=0,k=0; proc<size; proc++) {
4704     if (!len_s[proc]) continue;
4705     /* form outgoing message for i-structure:
4706          buf_si[0]:                 nrows to be sent
4707                [1:nrows]:           row index (global)
4708                [nrows+1:2*nrows+1]: i-structure index
4709     */
4710     /*-------------------------------------------*/
4711     nrows       = len_si[proc]/2 - 1;
4712     buf_si_i    = buf_si + nrows+1;
4713     buf_si[0]   = nrows;
4714     buf_si_i[0] = 0;
4715     nrows       = 0;
4716     for (i=owners[proc]; i<owners[proc+1]; i++) {
4717       anzi = ai[i+1] - ai[i];
4718       if (anzi) {
4719         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4720         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4721         nrows++;
4722       }
4723     }
4724     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4725     k++;
4726     buf_si += len_si[proc];
4727   }
4728 
4729   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4730   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4731 
4732   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4733   for (i=0; i<merge->nrecv; i++) {
4734     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4735   }
4736 
4737   ierr = PetscFree(len_si);CHKERRQ(ierr);
4738   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4739   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4740   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4741   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4742   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4743   ierr = PetscFree(status);CHKERRQ(ierr);
4744 
4745   /* compute a local seq matrix in each processor */
4746   /*----------------------------------------------*/
4747   /* allocate bi array and free space for accumulating nonzero column info */
4748   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4749   bi[0] = 0;
4750 
4751   /* create and initialize a linked list */
4752   nlnk = N+1;
4753   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4754 
4755   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4756   len  = ai[owners[rank+1]] - ai[owners[rank]];
4757   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4758 
4759   current_space = free_space;
4760 
4761   /* determine symbolic info for each local row */
4762   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4763 
4764   for (k=0; k<merge->nrecv; k++) {
4765     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4766     nrows       = *buf_ri_k[k];
4767     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4768     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4769   }
4770 
4771   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4772   len  = 0;
4773   for (i=0; i<m; i++) {
4774     bnzi = 0;
4775     /* add local non-zero cols of this proc's seqmat into lnk */
4776     arow  = owners[rank] + i;
4777     anzi  = ai[arow+1] - ai[arow];
4778     aj    = a->j + ai[arow];
4779     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4780     bnzi += nlnk;
4781     /* add received col data into lnk */
4782     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4783       if (i == *nextrow[k]) { /* i-th row */
4784         anzi  = *(nextai[k]+1) - *nextai[k];
4785         aj    = buf_rj[k] + *nextai[k];
4786         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787         bnzi += nlnk;
4788         nextrow[k]++; nextai[k]++;
4789       }
4790     }
4791     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4792 
4793     /* if free space is not available, make more free space */
4794     if (current_space->local_remaining<bnzi) {
4795       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4796       nspacedouble++;
4797     }
4798     /* copy data into free space, then initialize lnk */
4799     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4800     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4801 
4802     current_space->array           += bnzi;
4803     current_space->local_used      += bnzi;
4804     current_space->local_remaining -= bnzi;
4805 
4806     bi[i+1] = bi[i] + bnzi;
4807   }
4808 
4809   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4810 
4811   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4812   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4813   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4814 
4815   /* create symbolic parallel matrix B_mpi */
4816   /*---------------------------------------*/
4817   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4818   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4819   if (n==PETSC_DECIDE) {
4820     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4821   } else {
4822     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4823   }
4824   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4825   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4826   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4827   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4828   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4829 
4830   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4831   B_mpi->assembled    = PETSC_FALSE;
4832   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4833   merge->bi           = bi;
4834   merge->bj           = bj;
4835   merge->buf_ri       = buf_ri;
4836   merge->buf_rj       = buf_rj;
4837   merge->coi          = NULL;
4838   merge->coj          = NULL;
4839   merge->owners_co    = NULL;
4840 
4841   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4842 
4843   /* attach the supporting struct to B_mpi for reuse */
4844   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4845   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4846   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4847   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4848   *mpimat = B_mpi;
4849 
4850   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4851   PetscFunctionReturn(0);
4852 }
4853 
4854 #undef __FUNCT__
4855 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4856 /*@C
4857       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4858                  matrices from each processor
4859 
4860     Collective on MPI_Comm
4861 
4862    Input Parameters:
4863 +    comm - the communicators the parallel matrix will live on
4864 .    seqmat - the input sequential matrices
4865 .    m - number of local rows (or PETSC_DECIDE)
4866 .    n - number of local columns (or PETSC_DECIDE)
4867 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4868 
4869    Output Parameter:
4870 .    mpimat - the parallel matrix generated
4871 
4872     Level: advanced
4873 
4874    Notes:
4875      The dimensions of the sequential matrix in each processor MUST be the same.
4876      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4877      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4878 @*/
4879 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4880 {
4881   PetscErrorCode ierr;
4882   PetscMPIInt    size;
4883 
4884   PetscFunctionBegin;
4885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4886   if (size == 1) {
4887     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4888     if (scall == MAT_INITIAL_MATRIX) {
4889       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4890     } else {
4891       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4892     }
4893     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4894     PetscFunctionReturn(0);
4895   }
4896   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4897   if (scall == MAT_INITIAL_MATRIX) {
4898     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4899   }
4900   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4901   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4902   PetscFunctionReturn(0);
4903 }
4904 
4905 #undef __FUNCT__
4906 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4907 /*@
4908      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4909           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4910           with MatGetSize()
4911 
4912     Not Collective
4913 
4914    Input Parameters:
4915 +    A - the matrix
4916 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4917 
4918    Output Parameter:
4919 .    A_loc - the local sequential matrix generated
4920 
4921     Level: developer
4922 
4923 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4924 
4925 @*/
4926 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4927 {
4928   PetscErrorCode ierr;
4929   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4930   Mat_SeqAIJ     *mat,*a,*b;
4931   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4932   MatScalar      *aa,*ba,*cam;
4933   PetscScalar    *ca;
4934   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4935   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4936   PetscBool      match;
4937   MPI_Comm       comm;
4938   PetscMPIInt    size;
4939 
4940   PetscFunctionBegin;
4941   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4942   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4943   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4944   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4945   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4946 
4947   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4948   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4949   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4950   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4951   aa = a->a; ba = b->a;
4952   if (scall == MAT_INITIAL_MATRIX) {
4953     if (size == 1) {
4954       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4955       PetscFunctionReturn(0);
4956     }
4957 
4958     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4959     ci[0] = 0;
4960     for (i=0; i<am; i++) {
4961       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4962     }
4963     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4964     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4965     k    = 0;
4966     for (i=0; i<am; i++) {
4967       ncols_o = bi[i+1] - bi[i];
4968       ncols_d = ai[i+1] - ai[i];
4969       /* off-diagonal portion of A */
4970       for (jo=0; jo<ncols_o; jo++) {
4971         col = cmap[*bj];
4972         if (col >= cstart) break;
4973         cj[k]   = col; bj++;
4974         ca[k++] = *ba++;
4975       }
4976       /* diagonal portion of A */
4977       for (j=0; j<ncols_d; j++) {
4978         cj[k]   = cstart + *aj++;
4979         ca[k++] = *aa++;
4980       }
4981       /* off-diagonal portion of A */
4982       for (j=jo; j<ncols_o; j++) {
4983         cj[k]   = cmap[*bj++];
4984         ca[k++] = *ba++;
4985       }
4986     }
4987     /* put together the new matrix */
4988     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4989     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4990     /* Since these are PETSc arrays, change flags to free them as necessary. */
4991     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4992     mat->free_a  = PETSC_TRUE;
4993     mat->free_ij = PETSC_TRUE;
4994     mat->nonew   = 0;
4995   } else if (scall == MAT_REUSE_MATRIX) {
4996     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4997     ci = mat->i; cj = mat->j; cam = mat->a;
4998     for (i=0; i<am; i++) {
4999       /* off-diagonal portion of A */
5000       ncols_o = bi[i+1] - bi[i];
5001       for (jo=0; jo<ncols_o; jo++) {
5002         col = cmap[*bj];
5003         if (col >= cstart) break;
5004         *cam++ = *ba++; bj++;
5005       }
5006       /* diagonal portion of A */
5007       ncols_d = ai[i+1] - ai[i];
5008       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5009       /* off-diagonal portion of A */
5010       for (j=jo; j<ncols_o; j++) {
5011         *cam++ = *ba++; bj++;
5012       }
5013     }
5014   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5015   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5016   PetscFunctionReturn(0);
5017 }
5018 
5019 #undef __FUNCT__
5020 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5021 /*@C
5022      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5023 
5024     Not Collective
5025 
5026    Input Parameters:
5027 +    A - the matrix
5028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 -    row, col - index sets of rows and columns to extract (or NULL)
5030 
5031    Output Parameter:
5032 .    A_loc - the local sequential matrix generated
5033 
5034     Level: developer
5035 
5036 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5037 
5038 @*/
5039 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5040 {
5041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5042   PetscErrorCode ierr;
5043   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5044   IS             isrowa,iscola;
5045   Mat            *aloc;
5046   PetscBool      match;
5047 
5048   PetscFunctionBegin;
5049   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5050   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5051   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5052   if (!row) {
5053     start = A->rmap->rstart; end = A->rmap->rend;
5054     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5055   } else {
5056     isrowa = *row;
5057   }
5058   if (!col) {
5059     start = A->cmap->rstart;
5060     cmap  = a->garray;
5061     nzA   = a->A->cmap->n;
5062     nzB   = a->B->cmap->n;
5063     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5064     ncols = 0;
5065     for (i=0; i<nzB; i++) {
5066       if (cmap[i] < start) idx[ncols++] = cmap[i];
5067       else break;
5068     }
5069     imark = i;
5070     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5071     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5072     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5073   } else {
5074     iscola = *col;
5075   }
5076   if (scall != MAT_INITIAL_MATRIX) {
5077     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5078     aloc[0] = *A_loc;
5079   }
5080   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5081   *A_loc = aloc[0];
5082   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5083   if (!row) {
5084     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5085   }
5086   if (!col) {
5087     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5088   }
5089   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 #undef __FUNCT__
5094 #define __FUNCT__ "MatGetBrowsOfAcols"
5095 /*@C
5096     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5097 
5098     Collective on Mat
5099 
5100    Input Parameters:
5101 +    A,B - the matrices in mpiaij format
5102 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5103 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5104 
5105    Output Parameter:
5106 +    rowb, colb - index sets of rows and columns of B to extract
5107 -    B_seq - the sequential matrix generated
5108 
5109     Level: developer
5110 
5111 @*/
5112 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5113 {
5114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5115   PetscErrorCode ierr;
5116   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5117   IS             isrowb,iscolb;
5118   Mat            *bseq=NULL;
5119 
5120   PetscFunctionBegin;
5121   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5122     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5123   }
5124   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5125 
5126   if (scall == MAT_INITIAL_MATRIX) {
5127     start = A->cmap->rstart;
5128     cmap  = a->garray;
5129     nzA   = a->A->cmap->n;
5130     nzB   = a->B->cmap->n;
5131     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5132     ncols = 0;
5133     for (i=0; i<nzB; i++) {  /* row < local row index */
5134       if (cmap[i] < start) idx[ncols++] = cmap[i];
5135       else break;
5136     }
5137     imark = i;
5138     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5139     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5140     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5141     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5142   } else {
5143     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5144     isrowb  = *rowb; iscolb = *colb;
5145     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5146     bseq[0] = *B_seq;
5147   }
5148   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5149   *B_seq = bseq[0];
5150   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5151   if (!rowb) {
5152     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5153   } else {
5154     *rowb = isrowb;
5155   }
5156   if (!colb) {
5157     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5158   } else {
5159     *colb = iscolb;
5160   }
5161   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5162   PetscFunctionReturn(0);
5163 }
5164 
5165 #undef __FUNCT__
5166 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5167 /*
5168     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5169     of the OFF-DIAGONAL portion of local A
5170 
5171     Collective on Mat
5172 
5173    Input Parameters:
5174 +    A,B - the matrices in mpiaij format
5175 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5176 
5177    Output Parameter:
5178 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5179 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5180 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5181 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5182 
5183     Level: developer
5184 
5185 */
5186 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5187 {
5188   VecScatter_MPI_General *gen_to,*gen_from;
5189   PetscErrorCode         ierr;
5190   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5191   Mat_SeqAIJ             *b_oth;
5192   VecScatter             ctx =a->Mvctx;
5193   MPI_Comm               comm;
5194   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5195   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5196   PetscScalar            *rvalues,*svalues;
5197   MatScalar              *b_otha,*bufa,*bufA;
5198   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5199   MPI_Request            *rwaits = NULL,*swaits = NULL;
5200   MPI_Status             *sstatus,rstatus;
5201   PetscMPIInt            jj,size;
5202   PetscInt               *cols,sbs,rbs;
5203   PetscScalar            *vals;
5204 
5205   PetscFunctionBegin;
5206   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5207   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5208   if (size == 1) PetscFunctionReturn(0);
5209 
5210   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5211     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5212   }
5213   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5215 
5216   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5217   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5218   rvalues  = gen_from->values; /* holds the length of receiving row */
5219   svalues  = gen_to->values;   /* holds the length of sending row */
5220   nrecvs   = gen_from->n;
5221   nsends   = gen_to->n;
5222 
5223   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5224   srow    = gen_to->indices;    /* local row index to be sent */
5225   sstarts = gen_to->starts;
5226   sprocs  = gen_to->procs;
5227   sstatus = gen_to->sstatus;
5228   sbs     = gen_to->bs;
5229   rstarts = gen_from->starts;
5230   rprocs  = gen_from->procs;
5231   rbs     = gen_from->bs;
5232 
5233   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5234   if (scall == MAT_INITIAL_MATRIX) {
5235     /* i-array */
5236     /*---------*/
5237     /*  post receives */
5238     for (i=0; i<nrecvs; i++) {
5239       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5240       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5241       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5242     }
5243 
5244     /* pack the outgoing message */
5245     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5246 
5247     sstartsj[0] = 0;
5248     rstartsj[0] = 0;
5249     len         = 0; /* total length of j or a array to be sent */
5250     k           = 0;
5251     for (i=0; i<nsends; i++) {
5252       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5253       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5254       for (j=0; j<nrows; j++) {
5255         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5256         for (l=0; l<sbs; l++) {
5257           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5258 
5259           rowlen[j*sbs+l] = ncols;
5260 
5261           len += ncols;
5262           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5263         }
5264         k++;
5265       }
5266       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5267 
5268       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5269     }
5270     /* recvs and sends of i-array are completed */
5271     i = nrecvs;
5272     while (i--) {
5273       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5274     }
5275     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5276 
5277     /* allocate buffers for sending j and a arrays */
5278     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5279     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5280 
5281     /* create i-array of B_oth */
5282     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5283 
5284     b_othi[0] = 0;
5285     len       = 0; /* total length of j or a array to be received */
5286     k         = 0;
5287     for (i=0; i<nrecvs; i++) {
5288       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5289       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5290       for (j=0; j<nrows; j++) {
5291         b_othi[k+1] = b_othi[k] + rowlen[j];
5292         len        += rowlen[j]; k++;
5293       }
5294       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5295     }
5296 
5297     /* allocate space for j and a arrrays of B_oth */
5298     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5299     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5300 
5301     /* j-array */
5302     /*---------*/
5303     /*  post receives of j-array */
5304     for (i=0; i<nrecvs; i++) {
5305       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5306       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5307     }
5308 
5309     /* pack the outgoing message j-array */
5310     k = 0;
5311     for (i=0; i<nsends; i++) {
5312       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5313       bufJ  = bufj+sstartsj[i];
5314       for (j=0; j<nrows; j++) {
5315         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5316         for (ll=0; ll<sbs; ll++) {
5317           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5318           for (l=0; l<ncols; l++) {
5319             *bufJ++ = cols[l];
5320           }
5321           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5322         }
5323       }
5324       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5325     }
5326 
5327     /* recvs and sends of j-array are completed */
5328     i = nrecvs;
5329     while (i--) {
5330       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5331     }
5332     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5333   } else if (scall == MAT_REUSE_MATRIX) {
5334     sstartsj = *startsj_s;
5335     rstartsj = *startsj_r;
5336     bufa     = *bufa_ptr;
5337     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5338     b_otha   = b_oth->a;
5339   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5340 
5341   /* a-array */
5342   /*---------*/
5343   /*  post receives of a-array */
5344   for (i=0; i<nrecvs; i++) {
5345     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5346     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5347   }
5348 
5349   /* pack the outgoing message a-array */
5350   k = 0;
5351   for (i=0; i<nsends; i++) {
5352     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5353     bufA  = bufa+sstartsj[i];
5354     for (j=0; j<nrows; j++) {
5355       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5356       for (ll=0; ll<sbs; ll++) {
5357         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5358         for (l=0; l<ncols; l++) {
5359           *bufA++ = vals[l];
5360         }
5361         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5362       }
5363     }
5364     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5365   }
5366   /* recvs and sends of a-array are completed */
5367   i = nrecvs;
5368   while (i--) {
5369     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5370   }
5371   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5372   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5373 
5374   if (scall == MAT_INITIAL_MATRIX) {
5375     /* put together the new matrix */
5376     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5377 
5378     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5379     /* Since these are PETSc arrays, change flags to free them as necessary. */
5380     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5381     b_oth->free_a  = PETSC_TRUE;
5382     b_oth->free_ij = PETSC_TRUE;
5383     b_oth->nonew   = 0;
5384 
5385     ierr = PetscFree(bufj);CHKERRQ(ierr);
5386     if (!startsj_s || !bufa_ptr) {
5387       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5388       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5389     } else {
5390       *startsj_s = sstartsj;
5391       *startsj_r = rstartsj;
5392       *bufa_ptr  = bufa;
5393     }
5394   }
5395   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5396   PetscFunctionReturn(0);
5397 }
5398 
5399 #undef __FUNCT__
5400 #define __FUNCT__ "MatGetCommunicationStructs"
5401 /*@C
5402   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5403 
5404   Not Collective
5405 
5406   Input Parameters:
5407 . A - The matrix in mpiaij format
5408 
5409   Output Parameter:
5410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5411 . colmap - A map from global column index to local index into lvec
5412 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5413 
5414   Level: developer
5415 
5416 @*/
5417 #if defined(PETSC_USE_CTABLE)
5418 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5419 #else
5420 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5421 #endif
5422 {
5423   Mat_MPIAIJ *a;
5424 
5425   PetscFunctionBegin;
5426   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5427   PetscValidPointer(lvec, 2);
5428   PetscValidPointer(colmap, 3);
5429   PetscValidPointer(multScatter, 4);
5430   a = (Mat_MPIAIJ*) A->data;
5431   if (lvec) *lvec = a->lvec;
5432   if (colmap) *colmap = a->colmap;
5433   if (multScatter) *multScatter = a->Mvctx;
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5438 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5439 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5440 
5441 #undef __FUNCT__
5442 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5443 /*
5444     Computes (B'*A')' since computing B*A directly is untenable
5445 
5446                n                       p                          p
5447         (              )       (              )         (                  )
5448       m (      A       )  *  n (       B      )   =   m (         C        )
5449         (              )       (              )         (                  )
5450 
5451 */
5452 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5453 {
5454   PetscErrorCode ierr;
5455   Mat            At,Bt,Ct;
5456 
5457   PetscFunctionBegin;
5458   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5459   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5460   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5461   ierr = MatDestroy(&At);CHKERRQ(ierr);
5462   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5463   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5464   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5465   PetscFunctionReturn(0);
5466 }
5467 
5468 #undef __FUNCT__
5469 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5470 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5471 {
5472   PetscErrorCode ierr;
5473   PetscInt       m=A->rmap->n,n=B->cmap->n;
5474   Mat            Cmat;
5475 
5476   PetscFunctionBegin;
5477   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5478   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5479   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5480   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5481   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5482   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5483   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5484   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5485 
5486   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5487 
5488   *C = Cmat;
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 /* ----------------------------------------------------------------*/
5493 #undef __FUNCT__
5494 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5495 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5496 {
5497   PetscErrorCode ierr;
5498 
5499   PetscFunctionBegin;
5500   if (scall == MAT_INITIAL_MATRIX) {
5501     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5502     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5503     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5504   }
5505   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5506   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5507   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 #if defined(PETSC_HAVE_MUMPS)
5512 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5513 #endif
5514 #if defined(PETSC_HAVE_PASTIX)
5515 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5516 #endif
5517 #if defined(PETSC_HAVE_SUPERLU_DIST)
5518 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5519 #endif
5520 #if defined(PETSC_HAVE_CLIQUE)
5521 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5522 #endif
5523 
5524 /*MC
5525    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5526 
5527    Options Database Keys:
5528 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5529 
5530   Level: beginner
5531 
5532 .seealso: MatCreateAIJ()
5533 M*/
5534 
5535 #undef __FUNCT__
5536 #define __FUNCT__ "MatCreate_MPIAIJ"
5537 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5538 {
5539   Mat_MPIAIJ     *b;
5540   PetscErrorCode ierr;
5541   PetscMPIInt    size;
5542 
5543   PetscFunctionBegin;
5544   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5545 
5546   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5547   B->data       = (void*)b;
5548   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5549   B->assembled  = PETSC_FALSE;
5550   B->insertmode = NOT_SET_VALUES;
5551   b->size       = size;
5552 
5553   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5554 
5555   /* build cache for off array entries formed */
5556   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5557 
5558   b->donotstash  = PETSC_FALSE;
5559   b->colmap      = 0;
5560   b->garray      = 0;
5561   b->roworiented = PETSC_TRUE;
5562 
5563   /* stuff used for matrix vector multiply */
5564   b->lvec  = NULL;
5565   b->Mvctx = NULL;
5566 
5567   /* stuff for MatGetRow() */
5568   b->rowindices   = 0;
5569   b->rowvalues    = 0;
5570   b->getrowactive = PETSC_FALSE;
5571 
5572   /* flexible pointer used in CUSP/CUSPARSE classes */
5573   b->spptr = NULL;
5574 
5575 #if defined(PETSC_HAVE_MUMPS)
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5577 #endif
5578 #if defined(PETSC_HAVE_PASTIX)
5579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5580 #endif
5581 #if defined(PETSC_HAVE_SUPERLU_DIST)
5582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5583 #endif
5584 #if defined(PETSC_HAVE_CLIQUE)
5585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5586 #endif
5587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5601   PetscFunctionReturn(0);
5602 }
5603 
5604 #undef __FUNCT__
5605 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5606 /*@C
5607      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5608          and "off-diagonal" part of the matrix in CSR format.
5609 
5610    Collective on MPI_Comm
5611 
5612    Input Parameters:
5613 +  comm - MPI communicator
5614 .  m - number of local rows (Cannot be PETSC_DECIDE)
5615 .  n - This value should be the same as the local size used in creating the
5616        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5617        calculated if N is given) For square matrices n is almost always m.
5618 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5619 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5620 .   i - row indices for "diagonal" portion of matrix
5621 .   j - column indices
5622 .   a - matrix values
5623 .   oi - row indices for "off-diagonal" portion of matrix
5624 .   oj - column indices
5625 -   oa - matrix values
5626 
5627    Output Parameter:
5628 .   mat - the matrix
5629 
5630    Level: advanced
5631 
5632    Notes:
5633        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5634        must free the arrays once the matrix has been destroyed and not before.
5635 
5636        The i and j indices are 0 based
5637 
5638        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5639 
5640        This sets local rows and cannot be used to set off-processor values.
5641 
5642        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5643        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5644        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5645        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5646        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5647        communication if it is known that only local entries will be set.
5648 
5649 .keywords: matrix, aij, compressed row, sparse, parallel
5650 
5651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5652           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5653 C@*/
5654 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5655 {
5656   PetscErrorCode ierr;
5657   Mat_MPIAIJ     *maij;
5658 
5659   PetscFunctionBegin;
5660   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5661   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5662   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5663   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5664   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5665   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5666   maij = (Mat_MPIAIJ*) (*mat)->data;
5667 
5668   (*mat)->preallocated = PETSC_TRUE;
5669 
5670   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5671   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5672 
5673   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5674   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5675 
5676   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5677   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5678   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5679   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5680 
5681   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5684   PetscFunctionReturn(0);
5685 }
5686 
5687 /*
5688     Special version for direct calls from Fortran
5689 */
5690 #include <petsc-private/fortranimpl.h>
5691 
5692 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5693 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5694 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5695 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5696 #endif
5697 
5698 /* Change these macros so can be used in void function */
5699 #undef CHKERRQ
5700 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5701 #undef SETERRQ2
5702 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5703 #undef SETERRQ3
5704 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5705 #undef SETERRQ
5706 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5707 
5708 #undef __FUNCT__
5709 #define __FUNCT__ "matsetvaluesmpiaij_"
5710 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5711 {
5712   Mat            mat  = *mmat;
5713   PetscInt       m    = *mm, n = *mn;
5714   InsertMode     addv = *maddv;
5715   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5716   PetscScalar    value;
5717   PetscErrorCode ierr;
5718 
5719   MatCheckPreallocated(mat,1);
5720   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5721 
5722 #if defined(PETSC_USE_DEBUG)
5723   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5724 #endif
5725   {
5726     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5727     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5728     PetscBool roworiented = aij->roworiented;
5729 
5730     /* Some Variables required in the macro */
5731     Mat        A                 = aij->A;
5732     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5733     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5734     MatScalar  *aa               = a->a;
5735     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5736     Mat        B                 = aij->B;
5737     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5738     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5739     MatScalar  *ba               = b->a;
5740 
5741     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5742     PetscInt  nonew = a->nonew;
5743     MatScalar *ap1,*ap2;
5744 
5745     PetscFunctionBegin;
5746     for (i=0; i<m; i++) {
5747       if (im[i] < 0) continue;
5748 #if defined(PETSC_USE_DEBUG)
5749       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5750 #endif
5751       if (im[i] >= rstart && im[i] < rend) {
5752         row      = im[i] - rstart;
5753         lastcol1 = -1;
5754         rp1      = aj + ai[row];
5755         ap1      = aa + ai[row];
5756         rmax1    = aimax[row];
5757         nrow1    = ailen[row];
5758         low1     = 0;
5759         high1    = nrow1;
5760         lastcol2 = -1;
5761         rp2      = bj + bi[row];
5762         ap2      = ba + bi[row];
5763         rmax2    = bimax[row];
5764         nrow2    = bilen[row];
5765         low2     = 0;
5766         high2    = nrow2;
5767 
5768         for (j=0; j<n; j++) {
5769           if (roworiented) value = v[i*n+j];
5770           else value = v[i+j*m];
5771           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5772           if (in[j] >= cstart && in[j] < cend) {
5773             col = in[j] - cstart;
5774             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5775           } else if (in[j] < 0) continue;
5776 #if defined(PETSC_USE_DEBUG)
5777           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5778 #endif
5779           else {
5780             if (mat->was_assembled) {
5781               if (!aij->colmap) {
5782                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5783               }
5784 #if defined(PETSC_USE_CTABLE)
5785               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5786               col--;
5787 #else
5788               col = aij->colmap[in[j]] - 1;
5789 #endif
5790               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5791                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5792                 col  =  in[j];
5793                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5794                 B     = aij->B;
5795                 b     = (Mat_SeqAIJ*)B->data;
5796                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5797                 rp2   = bj + bi[row];
5798                 ap2   = ba + bi[row];
5799                 rmax2 = bimax[row];
5800                 nrow2 = bilen[row];
5801                 low2  = 0;
5802                 high2 = nrow2;
5803                 bm    = aij->B->rmap->n;
5804                 ba    = b->a;
5805               }
5806             } else col = in[j];
5807             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5808           }
5809         }
5810       } else if (!aij->donotstash) {
5811         if (roworiented) {
5812           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5813         } else {
5814           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5815         }
5816       }
5817     }
5818   }
5819   PetscFunctionReturnVoid();
5820 }
5821 
5822