xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 59dfc837361002dee75839639cf0fa599c59bc3c)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       a_noinsert: ; \
406       ailen[row] = nrow1; \
407 }
408 
409 
410 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
411   { \
412     if (col <= lastcol2) low2 = 0;                        \
413     else high2 = nrow2;                                   \
414     lastcol2 = col;                                       \
415     while (high2-low2 > 5) {                              \
416       t = (low2+high2)/2;                                 \
417       if (rp2[t] > col) high2 = t;                        \
418       else             low2  = t;                         \
419     }                                                     \
420     for (_i=low2; _i<high2; _i++) {                       \
421       if (rp2[_i] > col) break;                           \
422       if (rp2[_i] == col) {                               \
423         if (addv == ADD_VALUES) ap2[_i] += value;         \
424         else                    ap2[_i] = value;          \
425         goto b_noinsert;                                  \
426       }                                                   \
427     }                                                     \
428     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
429     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
430     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
431     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
432     N = nrow2++ - 1; b->nz++; high2++;                    \
433     /* shift up all the later entries in this row */      \
434     for (ii=N; ii>=_i; ii--) {                            \
435       rp2[ii+1] = rp2[ii];                                \
436       ap2[ii+1] = ap2[ii];                                \
437     }                                                     \
438     rp2[_i] = col;                                        \
439     ap2[_i] = value;                                      \
440     b_noinsert: ;                                         \
441     bilen[row] = nrow2;                                   \
442   }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
446 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
447 {
448   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
449   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
450   PetscErrorCode ierr;
451   PetscInt       l,*garray = mat->garray,diag;
452 
453   PetscFunctionBegin;
454   /* code only works for square matrices A */
455 
456   /* find size of row to the left of the diagonal part */
457   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
458   row  = row - diag;
459   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
460     if (garray[b->j[b->i[row]+l]] > diag) break;
461   }
462   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
463 
464   /* diagonal part */
465   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* right of diagonal part */
468   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatSetValues_MPIAIJ"
474 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
475 {
476   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
477   PetscScalar    value;
478   PetscErrorCode ierr;
479   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
480   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
481   PetscBool      roworiented = aij->roworiented;
482 
483   /* Some Variables required in the macro */
484   Mat        A                 = aij->A;
485   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
486   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
487   MatScalar  *aa               = a->a;
488   PetscBool  ignorezeroentries = a->ignorezeroentries;
489   Mat        B                 = aij->B;
490   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
491   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
492   MatScalar  *ba               = b->a;
493 
494   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
495   PetscInt  nonew;
496   MatScalar *ap1,*ap2;
497 
498   PetscFunctionBegin;
499   if (v) PetscValidScalarPointer(v,6);
500   for (i=0; i<m; i++) {
501     if (im[i] < 0) continue;
502 #if defined(PETSC_USE_DEBUG)
503     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
504 #endif
505     if (im[i] >= rstart && im[i] < rend) {
506       row      = im[i] - rstart;
507       lastcol1 = -1;
508       rp1      = aj + ai[row];
509       ap1      = aa + ai[row];
510       rmax1    = aimax[row];
511       nrow1    = ailen[row];
512       low1     = 0;
513       high1    = nrow1;
514       lastcol2 = -1;
515       rp2      = bj + bi[row];
516       ap2      = ba + bi[row];
517       rmax2    = bimax[row];
518       nrow2    = bilen[row];
519       low2     = 0;
520       high2    = nrow2;
521 
522       for (j=0; j<n; j++) {
523         if (v) {
524           if (roworiented) value = v[i*n+j];
525           else             value = v[i+j*m];
526         } else value = 0.0;
527         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
528         if (in[j] >= cstart && in[j] < cend) {
529           col   = in[j] - cstart;
530           nonew = a->nonew;
531           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
532         } else if (in[j] < 0) continue;
533 #if defined(PETSC_USE_DEBUG)
534         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
535 #endif
536         else {
537           if (mat->was_assembled) {
538             if (!aij->colmap) {
539               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
540             }
541 #if defined(PETSC_USE_CTABLE)
542             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
543             col--;
544 #else
545             col = aij->colmap[in[j]] - 1;
546 #endif
547             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
548               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
549               col  =  in[j];
550               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
551               B     = aij->B;
552               b     = (Mat_SeqAIJ*)B->data;
553               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
554               rp2   = bj + bi[row];
555               ap2   = ba + bi[row];
556               rmax2 = bimax[row];
557               nrow2 = bilen[row];
558               low2  = 0;
559               high2 = nrow2;
560               bm    = aij->B->rmap->n;
561               ba    = b->a;
562             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
563           } else col = in[j];
564           nonew = b->nonew;
565           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
566         }
567       }
568     } else {
569       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
570       if (!aij->donotstash) {
571         mat->assembled = PETSC_FALSE;
572         if (roworiented) {
573           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
574         } else {
575           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         }
577       }
578     }
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatGetValues_MPIAIJ"
585 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
586 {
587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
588   PetscErrorCode ierr;
589   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
590   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
591 
592   PetscFunctionBegin;
593   for (i=0; i<m; i++) {
594     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
595     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
596     if (idxm[i] >= rstart && idxm[i] < rend) {
597       row = idxm[i] - rstart;
598       for (j=0; j<n; j++) {
599         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
600         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
601         if (idxn[j] >= cstart && idxn[j] < cend) {
602           col  = idxn[j] - cstart;
603           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
604         } else {
605           if (!aij->colmap) {
606             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
607           }
608 #if defined(PETSC_USE_CTABLE)
609           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
610           col--;
611 #else
612           col = aij->colmap[idxn[j]] - 1;
613 #endif
614           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
615           else {
616             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
617           }
618         }
619       }
620     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
629 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       nstash,reallocs;
634   InsertMode     addv;
635 
636   PetscFunctionBegin;
637   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
638 
639   /* make sure all processors are either in INSERTMODE or ADDMODE */
640   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
641   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
642   mat->insertmode = addv; /* in case this processor had no cache */
643 
644   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
645   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
646   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
652 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
656   PetscErrorCode ierr;
657   PetscMPIInt    n;
658   PetscInt       i,j,rstart,ncols,flg;
659   PetscInt       *row,*col;
660   PetscBool      other_disassembled;
661   PetscScalar    *val;
662   InsertMode     addv = mat->insertmode;
663 
664   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
665 
666   PetscFunctionBegin;
667   if (!aij->donotstash && !mat->nooffprocentries) {
668     while (1) {
669       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
670       if (!flg) break;
671 
672       for (i=0; i<n; ) {
673         /* Now identify the consecutive vals belonging to the same row */
674         for (j=i,rstart=row[j]; j<n; j++) {
675           if (row[j] != rstart) break;
676         }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681 
682         i = j;
683       }
684     }
685     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
686   }
687   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
688   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
689 
690   /* determine if any processor has disassembled, if so we must
691      also disassemble ourselfs, in order that we may reassemble. */
692   /*
693      if nonzero structure of submatrix B cannot change then we know that
694      no processor disassembled thus we can skip this stuff
695   */
696   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
697     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
698     if (mat->was_assembled && !other_disassembled) {
699       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
700     }
701   }
702   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
703     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
704   }
705   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710 
711   aij->rowvalues = 0;
712 
713   /* used by MatAXPY() */
714   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
715   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
716 
717   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
718   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
724 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
725 {
726   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
731   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatZeroRows_MPIAIJ"
737 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
738 {
739   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
740   PetscInt      *owners = A->rmap->range;
741   PetscInt       n      = A->rmap->n;
742   PetscMPIInt    size   = mat->size;
743   PetscSF        sf;
744   PetscInt      *lrows;
745   PetscSFNode   *rrows;
746   PetscInt       lastidx = -1, r, p = 0, len = 0;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   /* Create SF where leaves are input rows and roots are owned rows */
751   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
752   for (r = 0; r < n; ++r) lrows[r] = -1;
753   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
754   for (r = 0; r < N; ++r) {
755     const PetscInt idx   = rows[r];
756     PetscBool      found = PETSC_FALSE;
757     /* Trick for efficient searching for sorted rows */
758     if (lastidx > idx) p = 0;
759     lastidx = idx;
760     for (; p < size; ++p) {
761       if (idx >= owners[p] && idx < owners[p+1]) {
762         rrows[r].rank  = p;
763         rrows[r].index = rows[r] - owners[p];
764         found = PETSC_TRUE;
765         break;
766       }
767     }
768     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
769   }
770   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
771   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
772   /* Collect flags for rows to be zeroed */
773   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
774   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
775   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
776   /* Compress and put in row numbers */
777   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
778   /* fix right hand side if needed */
779   if (x && b) {
780     const PetscScalar *xx;
781     PetscScalar       *bb;
782 
783     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
786     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
788   }
789   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
790   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
791   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
792     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
793   } else if (diag != 0.0) {
794     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
796     for (r = 0; r < len; ++r) {
797       const PetscInt row = lrows[r] + A->rmap->rstart;
798       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
799     }
800     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
801     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
802   } else {
803     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(lrows);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
811 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
812 {
813   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
814   PetscErrorCode    ierr;
815   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
816   PetscInt          i,j,r,m,p = 0,len = 0;
817   PetscInt          *lrows,*owners = A->rmap->range;
818   PetscSFNode       *rrows;
819   PetscSF           sf;
820   const PetscScalar *xx;
821   PetscScalar       *bb,*mask;
822   Vec               xmask,lmask;
823   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
824   const PetscInt    *aj, *ii,*ridx;
825   PetscScalar       *aa;
826 #if defined(PETSC_DEBUG)
827   PetscBool found = PETSC_FALSE;
828 #endif
829 
830   PetscFunctionBegin;
831   /* Create SF where leaves are input rows and roots are owned rows */
832   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
833   for (r = 0; r < n; ++r) lrows[r] = -1;
834   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
835   for (r = 0; r < N; ++r) {
836     const PetscInt idx   = rows[r];
837     PetscBool      found = PETSC_FALSE;
838     /* Trick for efficient searching for sorted rows */
839     if (lastidx > idx) p = 0;
840     lastidx = idx;
841     for (; p < size; ++p) {
842       if (idx >= owners[p] && idx < owners[p+1]) {
843         rrows[r].rank  = p;
844         rrows[r].index = rows[r] - owners[p];
845         found = PETSC_TRUE;
846         break;
847       }
848     }
849     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMult_MPIAIJ"
930 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
931 {
932   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       nt;
935 
936   PetscFunctionBegin;
937   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
938   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
948 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
949 {
950   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatMultAdd_MPIAIJ"
960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
961 {
962   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
968   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
975 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
976 {
977   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
978   PetscErrorCode ierr;
979   PetscBool      merged;
980 
981   PetscFunctionBegin;
982   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
983   /* do nondiagonal part */
984   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
985   if (!merged) {
986     /* send it on its way */
987     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
988     /* do local part */
989     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
990     /* receive remote parts: note this assumes the values are not actually */
991     /* added in yy until the next line, */
992     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993   } else {
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* send it on its way */
997     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998     /* values actually were received in the Begin() but we need to call this nop */
999     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1006 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1007 {
1008   MPI_Comm       comm;
1009   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1010   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1011   IS             Me,Notme;
1012   PetscErrorCode ierr;
1013   PetscInt       M,N,first,last,*notme,i;
1014   PetscMPIInt    size;
1015 
1016   PetscFunctionBegin;
1017   /* Easy test: symmetric diagonal block */
1018   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1019   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1020   if (!*f) PetscFunctionReturn(0);
1021   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1023   if (size == 1) PetscFunctionReturn(0);
1024 
1025   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1026   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1027   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1028   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1029   for (i=0; i<first; i++) notme[i] = i;
1030   for (i=last; i<M; i++) notme[i-last+first] = i;
1031   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1032   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1033   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1034   Aoff = Aoffs[0];
1035   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1036   Boff = Boffs[0];
1037   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1039   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1041   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1042   ierr = PetscFree(notme);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1048 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1049 {
1050   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   /* do nondiagonal part */
1055   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1056   /* send it on its way */
1057   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1058   /* do local part */
1059   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1060   /* receive remote parts */
1061   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 /*
1066   This only works correctly for square matrices where the subblock A->A is the
1067    diagonal block
1068 */
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatScale_MPIAIJ"
1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1092   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIAIJ"
1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099 {
1100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1109   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1111 #if defined(PETSC_USE_CTABLE)
1112   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1113 #else
1114   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1118   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1119   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1120   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1121   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1122 
1123   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1138 {
1139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1140   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1141   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1142   PetscErrorCode ierr;
1143   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1144   int            fd;
1145   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1146   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1147   PetscScalar    *column_values;
1148   PetscInt       message_count,flowcontrolcount;
1149   FILE           *file;
1150 
1151   PetscFunctionBegin;
1152   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1153   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1154   nz   = A->nz + B->nz;
1155   if (!rank) {
1156     header[0] = MAT_FILE_CLASSID;
1157     header[1] = mat->rmap->N;
1158     header[2] = mat->cmap->N;
1159 
1160     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1162     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1163     /* get largest number of rows any processor has */
1164     rlen  = mat->rmap->n;
1165     range = mat->rmap->range;
1166     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1167   } else {
1168     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1169     rlen = mat->rmap->n;
1170   }
1171 
1172   /* load up the local row counts */
1173   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1174   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     for (i=1; i<size; i++) {
1181       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1182       rlen = range[i+1] - range[i];
1183       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1189     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1193 
1194   /* load up the local column indices */
1195   nzmax = nz; /* th processor needs space a largest processor needs */
1196   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1198   cnt   = 0;
1199   for (i=0; i<mat->rmap->n; i++) {
1200     for (j=B->i[i]; j<B->i[i+1]; j++) {
1201       if ((col = garray[B->j[j]]) > cstart) break;
1202       column_indices[cnt++] = col;
1203     }
1204     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1205     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1206   }
1207   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1208 
1209   /* store the column indices to the file */
1210   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1211   if (!rank) {
1212     MPI_Status status;
1213     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     for (i=1; i<size; i++) {
1215       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1216       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1217       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1218       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1229 
1230   /* load up the local column values */
1231   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1232   cnt  = 0;
1233   for (i=0; i<mat->rmap->n; i++) {
1234     for (j=B->i[i]; j<B->i[i+1]; j++) {
1235       if (garray[B->j[j]] > cstart) break;
1236       column_values[cnt++] = B->a[j];
1237     }
1238     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1239     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1240   }
1241   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1242 
1243   /* store the column values to the file */
1244   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1245   if (!rank) {
1246     MPI_Status status;
1247     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1248     for (i=1; i<size; i++) {
1249       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1250       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1251       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1252       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     }
1255     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1256   } else {
1257     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1258     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(column_values);CHKERRQ(ierr);
1263 
1264   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1265   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include <petscdraw.h>
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1336   }
1337 
1338   if (size == 1) {
1339     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1340     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1341   } else {
1342     /* assemble the entire matrix onto first processor. */
1343     Mat        A;
1344     Mat_SeqAIJ *Aloc;
1345     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1346     MatScalar  *a;
1347 
1348     if (mat->rmap->N > 1024) {
1349       PetscBool flg = PETSC_FALSE;
1350 
1351       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1352       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1353     }
1354 
1355     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1356     if (!rank) {
1357       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1358     } else {
1359       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1360     }
1361     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1362     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1363     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1364     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1365     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1366 
1367     /* copy over the A part */
1368     Aloc = (Mat_SeqAIJ*)aij->A->data;
1369     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1370     row  = mat->rmap->rstart;
1371     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1372     for (i=0; i<m; i++) {
1373       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1374       row++;
1375       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1376     }
1377     aj = Aloc->j;
1378     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1379 
1380     /* copy over the B part */
1381     Aloc = (Mat_SeqAIJ*)aij->B->data;
1382     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1383     row  = mat->rmap->rstart;
1384     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1385     ct   = cols;
1386     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1387     for (i=0; i<m; i++) {
1388       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1389       row++;
1390       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1391     }
1392     ierr = PetscFree(ct);CHKERRQ(ierr);
1393     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395     /*
1396        Everyone has to call to draw the matrix since the graphics waits are
1397        synchronized across all processors that share the PetscDraw object
1398     */
1399     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1400     if (!rank) {
1401       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1402       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1403       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1404       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1405     }
1406     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1407     ierr = MatDestroy(&A);CHKERRQ(ierr);
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatView_MPIAIJ"
1414 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1415 {
1416   PetscErrorCode ierr;
1417   PetscBool      iascii,isdraw,issocket,isbinary;
1418 
1419   PetscFunctionBegin;
1420   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1421   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1422   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1424   if (iascii || isdraw || isbinary || issocket) {
1425     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatSOR_MPIAIJ"
1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1433 {
1434   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1435   PetscErrorCode ierr;
1436   Vec            bb1 = 0;
1437   PetscBool      hasop;
1438 
1439   PetscFunctionBegin;
1440   if (flag == SOR_APPLY_UPPER) {
1441     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442     PetscFunctionReturn(0);
1443   }
1444 
1445   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1446     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1447   }
1448 
1449   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454 
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_EISENSTAT) {
1499     Vec xx1;
1500 
1501     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1502     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1503 
1504     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506     if (!mat->diag) {
1507       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1508       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1509     }
1510     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1511     if (hasop) {
1512       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1513     } else {
1514       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1515     }
1516     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1517 
1518     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1519 
1520     /* local sweep */
1521     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1522     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1523     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1524   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1525 
1526   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatPermute_MPIAIJ"
1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1533 {
1534   Mat            aA,aB,Aperm;
1535   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1536   PetscScalar    *aa,*ba;
1537   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1538   PetscSF        rowsf,sf;
1539   IS             parcolp = NULL;
1540   PetscBool      done;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1545   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1546   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1547   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1548 
1549   /* Invert row permutation to find out where my rows should go */
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1553   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1554   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1556 
1557   /* Invert column permutation to find out where my columns should go */
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1561   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1562   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1563   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1567   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1568   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1569 
1570   /* Find out where my gcols should go */
1571   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1572   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1573   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1574   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1575   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1581   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1582   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt row = rdest[i],rowner;
1585     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1586     for (j=ai[i]; j<ai[i+1]; j++) {
1587       PetscInt cowner,col = cdest[aj[j]];
1588       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592     for (j=bi[i]; j<bi[i+1]; j++) {
1593       PetscInt cowner,col = gcdest[bj[j]];
1594       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1595       if (rowner == cowner) dnnz[i]++;
1596       else onnz[i]++;
1597     }
1598   }
1599   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1600   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1601   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1602   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1603   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1604 
1605   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1606   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1607   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1608   for (i=0; i<m; i++) {
1609     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1610     PetscInt j0,rowlen;
1611     rowlen = ai[i+1] - ai[i];
1612     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1613       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1614       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1615     }
1616     rowlen = bi[i+1] - bi[i];
1617     for (j0=j=0; j<rowlen; j0=j) {
1618       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1619       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1620     }
1621   }
1622   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1625   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1626   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1627   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1628   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1629   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1630   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1631   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1632   *B = Aperm;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1638 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1639 {
1640   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1641   Mat            A    = mat->A,B = mat->B;
1642   PetscErrorCode ierr;
1643   PetscReal      isend[5],irecv[5];
1644 
1645   PetscFunctionBegin;
1646   info->block_size = 1.0;
1647   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1650   isend[3] = info->memory;  isend[4] = info->mallocs;
1651 
1652   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1653 
1654   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1655   isend[3] += info->memory;  isend[4] += info->mallocs;
1656   if (flag == MAT_LOCAL) {
1657     info->nz_used      = isend[0];
1658     info->nz_allocated = isend[1];
1659     info->nz_unneeded  = isend[2];
1660     info->memory       = isend[3];
1661     info->mallocs      = isend[4];
1662   } else if (flag == MAT_GLOBAL_MAX) {
1663     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1664 
1665     info->nz_used      = irecv[0];
1666     info->nz_allocated = irecv[1];
1667     info->nz_unneeded  = irecv[2];
1668     info->memory       = irecv[3];
1669     info->mallocs      = irecv[4];
1670   } else if (flag == MAT_GLOBAL_SUM) {
1671     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1672 
1673     info->nz_used      = irecv[0];
1674     info->nz_allocated = irecv[1];
1675     info->nz_unneeded  = irecv[2];
1676     info->memory       = irecv[3];
1677     info->mallocs      = irecv[4];
1678   }
1679   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1680   info->fill_ratio_needed = 0;
1681   info->factor_mallocs    = 0;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSetOption_MPIAIJ"
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1688 {
1689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   switch (op) {
1694   case MAT_NEW_NONZERO_LOCATIONS:
1695   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1696   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1697   case MAT_KEEP_NONZERO_PATTERN:
1698   case MAT_NEW_NONZERO_LOCATION_ERR:
1699   case MAT_USE_INODES:
1700   case MAT_IGNORE_ZERO_ENTRIES:
1701     MatCheckPreallocated(A,1);
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     a->roworiented = flg;
1707 
1708     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1709     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1710     break;
1711   case MAT_NEW_DIAGONALS:
1712     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1713     break;
1714   case MAT_IGNORE_OFF_PROC_ENTRIES:
1715     a->donotstash = flg;
1716     break;
1717   case MAT_SPD:
1718     A->spd_set = PETSC_TRUE;
1719     A->spd     = flg;
1720     if (flg) {
1721       A->symmetric                  = PETSC_TRUE;
1722       A->structurally_symmetric     = PETSC_TRUE;
1723       A->symmetric_set              = PETSC_TRUE;
1724       A->structurally_symmetric_set = PETSC_TRUE;
1725     }
1726     break;
1727   case MAT_SYMMETRIC:
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     break;
1730   case MAT_STRUCTURALLY_SYMMETRIC:
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_HERMITIAN:
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_SYMMETRY_ETERNAL:
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   default:
1740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1741   }
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatGetRow_MPIAIJ"
1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1751   PetscErrorCode ierr;
1752   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1753   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1754   PetscInt       *cmap,*idx_p;
1755 
1756   PetscFunctionBegin;
1757   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1758   mat->getrowactive = PETSC_TRUE;
1759 
1760   if (!mat->rowvalues && (idx || v)) {
1761     /*
1762         allocate enough space to hold information from the longest row.
1763     */
1764     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1765     PetscInt   max = 1,tmp;
1766     for (i=0; i<matin->rmap->n; i++) {
1767       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1768       if (max < tmp) max = tmp;
1769     }
1770     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1771   }
1772 
1773   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1774   lrow = row - rstart;
1775 
1776   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1777   if (!v)   {pvA = 0; pvB = 0;}
1778   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1779   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1780   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1781   nztot = nzA + nzB;
1782 
1783   cmap = mat->garray;
1784   if (v  || idx) {
1785     if (nztot) {
1786       /* Sort by increasing column numbers, assuming A and B already sorted */
1787       PetscInt imark = -1;
1788       if (v) {
1789         *v = v_p = mat->rowvalues;
1790         for (i=0; i<nzB; i++) {
1791           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1792           else break;
1793         }
1794         imark = i;
1795         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1796         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1797       }
1798       if (idx) {
1799         *idx = idx_p = mat->rowindices;
1800         if (imark > -1) {
1801           for (i=0; i<imark; i++) {
1802             idx_p[i] = cmap[cworkB[i]];
1803           }
1804         } else {
1805           for (i=0; i<nzB; i++) {
1806             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1807             else break;
1808           }
1809           imark = i;
1810         }
1811         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1812         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1813       }
1814     } else {
1815       if (idx) *idx = 0;
1816       if (v)   *v   = 0;
1817     }
1818   }
1819   *nz  = nztot;
1820   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1821   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatNorm_MPIAIJ"
1839 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1840 {
1841   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1842   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1843   PetscErrorCode ierr;
1844   PetscInt       i,j,cstart = mat->cmap->rstart;
1845   PetscReal      sum = 0.0;
1846   MatScalar      *v;
1847 
1848   PetscFunctionBegin;
1849   if (aij->size == 1) {
1850     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1851   } else {
1852     if (type == NORM_FROBENIUS) {
1853       v = amat->a;
1854       for (i=0; i<amat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       v = bmat->a;
1858       for (i=0; i<bmat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1862       *norm = PetscSqrtReal(*norm);
1863     } else if (type == NORM_1) { /* max column norm */
1864       PetscReal *tmp,*tmp2;
1865       PetscInt  *jj,*garray = aij->garray;
1866       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1867       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1868       *norm = 0.0;
1869       v     = amat->a; jj = amat->j;
1870       for (j=0; j<amat->nz; j++) {
1871         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1872       }
1873       v = bmat->a; jj = bmat->j;
1874       for (j=0; j<bmat->nz; j++) {
1875         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1876       }
1877       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1878       for (j=0; j<mat->cmap->N; j++) {
1879         if (tmp2[j] > *norm) *norm = tmp2[j];
1880       }
1881       ierr = PetscFree(tmp);CHKERRQ(ierr);
1882       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1883     } else if (type == NORM_INFINITY) { /* max row norm */
1884       PetscReal ntemp = 0.0;
1885       for (j=0; j<aij->A->rmap->n; j++) {
1886         v   = amat->a + amat->i[j];
1887         sum = 0.0;
1888         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         v = bmat->a + bmat->i[j];
1892         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         if (sum > ntemp) ntemp = sum;
1896       }
1897       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatTranspose_MPIAIJ"
1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1906 {
1907   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1908   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1909   PetscErrorCode ierr;
1910   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1911   PetscInt       cstart = A->cmap->rstart,ncol;
1912   Mat            B;
1913   MatScalar      *array;
1914 
1915   PetscFunctionBegin;
1916   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1917 
1918   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1919   ai = Aloc->i; aj = Aloc->j;
1920   bi = Bloc->i; bj = Bloc->j;
1921   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1922     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1923     PetscSFNode          *oloc;
1924     PETSC_UNUSED PetscSF sf;
1925 
1926     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1927     /* compute d_nnz for preallocation */
1928     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1929     for (i=0; i<ai[ma]; i++) {
1930       d_nnz[aj[i]]++;
1931       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1932     }
1933     /* compute local off-diagonal contributions */
1934     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1935     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1936     /* map those to global */
1937     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1938     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1939     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1940     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1941     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1943     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1944 
1945     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1946     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1947     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1948     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1949     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1950     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1951   } else {
1952     B    = *matout;
1953     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1955   }
1956 
1957   /* copy over the A part */
1958   array = Aloc->a;
1959   row   = A->rmap->rstart;
1960   for (i=0; i<ma; i++) {
1961     ncol = ai[i+1]-ai[i];
1962     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1963     row++;
1964     array += ncol; aj += ncol;
1965   }
1966   aj = Aloc->j;
1967   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1968 
1969   /* copy over the B part */
1970   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1971   array = Bloc->a;
1972   row   = A->rmap->rstart;
1973   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1974   cols_tmp = cols;
1975   for (i=0; i<mb; i++) {
1976     ncol = bi[i+1]-bi[i];
1977     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1978     row++;
1979     array += ncol; cols_tmp += ncol;
1980   }
1981   ierr = PetscFree(cols);CHKERRQ(ierr);
1982 
1983   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1985   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1986     *matout = B;
1987   } else {
1988     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1995 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1996 {
1997   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1998   Mat            a    = aij->A,b = aij->B;
1999   PetscErrorCode ierr;
2000   PetscInt       s1,s2,s3;
2001 
2002   PetscFunctionBegin;
2003   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2004   if (rr) {
2005     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2006     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2007     /* Overlap communication with computation. */
2008     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2009   }
2010   if (ll) {
2011     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2012     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2013     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2014   }
2015   /* scale  the diagonal block */
2016   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2017 
2018   if (rr) {
2019     /* Do a scatter end and then right scale the off-diagonal block */
2020     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2021     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2028 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2029 {
2030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatEqual_MPIAIJ"
2040 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2041 {
2042   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2043   Mat            a,b,c,d;
2044   PetscBool      flg;
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   a = matA->A; b = matA->B;
2049   c = matB->A; d = matB->B;
2050 
2051   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2052   if (flg) {
2053     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2054   }
2055   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatCopy_MPIAIJ"
2061 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2062 {
2063   PetscErrorCode ierr;
2064   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2065   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2066 
2067   PetscFunctionBegin;
2068   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2069   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2070     /* because of the column compression in the off-processor part of the matrix a->B,
2071        the number of columns in a->B and b->B may be different, hence we cannot call
2072        the MatCopy() directly on the two parts. If need be, we can provide a more
2073        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2074        then copying the submatrices */
2075     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2076   } else {
2077     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2078     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatSetUp_MPIAIJ"
2085 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2086 {
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2096 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2097 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2098 {
2099   PetscInt       i,m=Y->rmap->N;
2100   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2101   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2102   const PetscInt *xi = x->i,*yi = y->i;
2103 
2104   PetscFunctionBegin;
2105   /* Set the number of nonzeros in the new matrix */
2106   for (i=0; i<m; i++) {
2107     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2108     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2109     nnz[i] = 0;
2110     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2111       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2112       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2113       nnz[i]++;
2114     }
2115     for (; k<nzy; k++) nnz[i]++;
2116   }
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatAXPY_MPIAIJ"
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   PetscInt       i;
2126   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2127   PetscBLASInt   bnz,one=1;
2128   Mat_SeqAIJ     *x,*y;
2129 
2130   PetscFunctionBegin;
2131   if (str == SAME_NONZERO_PATTERN) {
2132     PetscScalar alpha = a;
2133     x    = (Mat_SeqAIJ*)xx->A->data;
2134     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2135     y    = (Mat_SeqAIJ*)yy->A->data;
2136     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2137     x    = (Mat_SeqAIJ*)xx->B->data;
2138     y    = (Mat_SeqAIJ*)yy->B->data;
2139     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2140     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2141   } else if (str == SUBSET_NONZERO_PATTERN) {
2142     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2143 
2144     x = (Mat_SeqAIJ*)xx->B->data;
2145     y = (Mat_SeqAIJ*)yy->B->data;
2146     if (y->xtoy && y->XtoY != xx->B) {
2147       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2148       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2149     }
2150     if (!y->xtoy) { /* get xtoy */
2151       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2152       y->XtoY = xx->B;
2153       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2154     }
2155     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2156   } else {
2157     Mat      B;
2158     PetscInt *nnz_d,*nnz_o;
2159     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2160     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2161     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2162     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2163     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2164     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2165     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2166     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2167     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2168     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2169     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2170     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2171     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2172     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatConjugate_MPIAIJ"
2181 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2182 {
2183 #if defined(PETSC_USE_COMPLEX)
2184   PetscErrorCode ierr;
2185   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2186 
2187   PetscFunctionBegin;
2188   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2189   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2190 #else
2191   PetscFunctionBegin;
2192 #endif
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatRealPart_MPIAIJ"
2198 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2199 {
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2205   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2211 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2212 {
2213   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2218   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 #if defined(PETSC_HAVE_PBGL)
2223 
2224 #include <boost/parallel/mpi/bsp_process_group.hpp>
2225 #include <boost/graph/distributed/ilu_default_graph.hpp>
2226 #include <boost/graph/distributed/ilu_0_block.hpp>
2227 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2228 #include <boost/graph/distributed/petsc/interface.hpp>
2229 #include <boost/multi_array.hpp>
2230 #include <boost/parallel/distributed_property_map->hpp>
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2234 /*
2235   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2236 */
2237 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2238 {
2239   namespace petsc = boost::distributed::petsc;
2240 
2241   namespace graph_dist = boost::graph::distributed;
2242   using boost::graph::distributed::ilu_default::process_group_type;
2243   using boost::graph::ilu_permuted;
2244 
2245   PetscBool      row_identity, col_identity;
2246   PetscContainer c;
2247   PetscInt       m, n, M, N;
2248   PetscErrorCode ierr;
2249 
2250   PetscFunctionBegin;
2251   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2252   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2253   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2254   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2255 
2256   process_group_type pg;
2257   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2258   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2259   lgraph_type& level_graph = *lgraph_p;
2260   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2261 
2262   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2263   ilu_permuted(level_graph);
2264 
2265   /* put together the new matrix */
2266   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2267   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2268   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2269   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2270   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2271   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2272   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274 
2275   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2276   ierr = PetscContainerSetPointer(c, lgraph_p);
2277   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2278   ierr = PetscContainerDestroy(&c);
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2284 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2285 {
2286   PetscFunctionBegin;
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatSolve_MPIAIJ"
2292 /*
2293   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2294 */
2295 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2296 {
2297   namespace graph_dist = boost::graph::distributed;
2298 
2299   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2300   lgraph_type    *lgraph_p;
2301   PetscContainer c;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2306   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2307   ierr = VecCopy(b, x);CHKERRQ(ierr);
2308 
2309   PetscScalar *array_x;
2310   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2311   PetscInt sx;
2312   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2313 
2314   PetscScalar *array_b;
2315   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2316   PetscInt sb;
2317   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2318 
2319   lgraph_type& level_graph = *lgraph_p;
2320   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2321 
2322   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2323   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2324   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2325 
2326   typedef boost::iterator_property_map<array_ref_type::iterator,
2327                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2328   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2329   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2330 
2331   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2332   PetscFunctionReturn(0);
2333 }
2334 #endif
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatDestroy_MatRedundant"
2338 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2339 {
2340   PetscErrorCode ierr;
2341   Mat_Redundant  *redund;
2342   PetscInt       i;
2343   PetscMPIInt    size;
2344 
2345   PetscFunctionBegin;
2346   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2347   if (size == 1) {
2348     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2349     redund = a->redundant;
2350   } else {
2351     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2352     redund = a->redundant;
2353   }
2354   if (redund){
2355     if (redund->matseq) { /* via MatGetSubMatrices()  */
2356       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2357       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2358       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2359       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2360     } else {
2361       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2362       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2363       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2364       for (i=0; i<redund->nrecvs; i++) {
2365         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2366         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2367       }
2368       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2369     }
2370 
2371     if (redund->psubcomm) {
2372       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2373     }
2374     ierr = redund->Destroy(A);CHKERRQ(ierr);
2375     ierr = PetscFree(redund);CHKERRQ(ierr);
2376   }
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2382 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2383 {
2384   PetscMPIInt    rank,size;
2385   MPI_Comm       comm;
2386   PetscErrorCode ierr;
2387   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2388   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2389   PetscInt       *rowrange = mat->rmap->range;
2390   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2391   Mat            A = aij->A,B=aij->B,C=*matredundant;
2392   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2393   PetscScalar    *sbuf_a;
2394   PetscInt       nzlocal=a->nz+b->nz;
2395   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2396   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2397   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2398   MatScalar      *aworkA,*aworkB;
2399   PetscScalar    *vals;
2400   PetscMPIInt    tag1,tag2,tag3,imdex;
2401   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2402   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2403   MPI_Status     recv_status,*send_status;
2404   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2405   PetscInt       **rbuf_j=NULL;
2406   PetscScalar    **rbuf_a=NULL;
2407   Mat_Redundant  *redund =NULL;
2408 
2409   PetscFunctionBegin;
2410   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2411   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2413   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2414   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2415 
2416   if (reuse == MAT_REUSE_MATRIX) {
2417     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2418     if (subsize == 1) {
2419       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2420       redund = c->redundant;
2421     } else {
2422       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2423       redund = c->redundant;
2424     }
2425     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2426 
2427     nsends    = redund->nsends;
2428     nrecvs    = redund->nrecvs;
2429     send_rank = redund->send_rank;
2430     recv_rank = redund->recv_rank;
2431     sbuf_nz   = redund->sbuf_nz;
2432     rbuf_nz   = redund->rbuf_nz;
2433     sbuf_j    = redund->sbuf_j;
2434     sbuf_a    = redund->sbuf_a;
2435     rbuf_j    = redund->rbuf_j;
2436     rbuf_a    = redund->rbuf_a;
2437   }
2438 
2439   if (reuse == MAT_INITIAL_MATRIX) {
2440     PetscInt    nleftover,np_subcomm;
2441 
2442     /* get the destination processors' id send_rank, nsends and nrecvs */
2443     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2444 
2445     np_subcomm = size/nsubcomm;
2446     nleftover  = size - nsubcomm*np_subcomm;
2447 
2448     /* block of codes below is specific for INTERLACED */
2449     /* ------------------------------------------------*/
2450     nsends = 0; nrecvs = 0;
2451     for (i=0; i<size; i++) {
2452       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2453         send_rank[nsends++] = i;
2454         recv_rank[nrecvs++] = i;
2455       }
2456     }
2457     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2458       i = size-nleftover-1;
2459       j = 0;
2460       while (j < nsubcomm - nleftover) {
2461         send_rank[nsends++] = i;
2462         i--; j++;
2463       }
2464     }
2465 
2466     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2467       for (i=0; i<nleftover; i++) {
2468         recv_rank[nrecvs++] = size-nleftover+i;
2469       }
2470     }
2471     /*----------------------------------------------*/
2472 
2473     /* allocate sbuf_j, sbuf_a */
2474     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2475     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2476     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2477     /*
2478     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2479     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2480      */
2481   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2482 
2483   /* copy mat's local entries into the buffers */
2484   if (reuse == MAT_INITIAL_MATRIX) {
2485     rownz_max = 0;
2486     rptr      = sbuf_j;
2487     cols      = sbuf_j + rend-rstart + 1;
2488     vals      = sbuf_a;
2489     rptr[0]   = 0;
2490     for (i=0; i<rend-rstart; i++) {
2491       row    = i + rstart;
2492       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2493       ncols  = nzA + nzB;
2494       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2495       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2496       /* load the column indices for this row into cols */
2497       lwrite = 0;
2498       for (l=0; l<nzB; l++) {
2499         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2500           vals[lwrite]   = aworkB[l];
2501           cols[lwrite++] = ctmp;
2502         }
2503       }
2504       for (l=0; l<nzA; l++) {
2505         vals[lwrite]   = aworkA[l];
2506         cols[lwrite++] = cstart + cworkA[l];
2507       }
2508       for (l=0; l<nzB; l++) {
2509         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2510           vals[lwrite]   = aworkB[l];
2511           cols[lwrite++] = ctmp;
2512         }
2513       }
2514       vals     += ncols;
2515       cols     += ncols;
2516       rptr[i+1] = rptr[i] + ncols;
2517       if (rownz_max < ncols) rownz_max = ncols;
2518     }
2519     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2520   } else { /* only copy matrix values into sbuf_a */
2521     rptr    = sbuf_j;
2522     vals    = sbuf_a;
2523     rptr[0] = 0;
2524     for (i=0; i<rend-rstart; i++) {
2525       row    = i + rstart;
2526       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2527       ncols  = nzA + nzB;
2528       cworkB = b->j + b->i[i];
2529       aworkA = a->a + a->i[i];
2530       aworkB = b->a + b->i[i];
2531       lwrite = 0;
2532       for (l=0; l<nzB; l++) {
2533         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2534       }
2535       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2536       for (l=0; l<nzB; l++) {
2537         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2538       }
2539       vals     += ncols;
2540       rptr[i+1] = rptr[i] + ncols;
2541     }
2542   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2543 
2544   /* send nzlocal to others, and recv other's nzlocal */
2545   /*--------------------------------------------------*/
2546   if (reuse == MAT_INITIAL_MATRIX) {
2547     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2548 
2549     s_waits2 = s_waits3 + nsends;
2550     s_waits1 = s_waits2 + nsends;
2551     r_waits1 = s_waits1 + nsends;
2552     r_waits2 = r_waits1 + nrecvs;
2553     r_waits3 = r_waits2 + nrecvs;
2554   } else {
2555     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2556 
2557     r_waits3 = s_waits3 + nsends;
2558   }
2559 
2560   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2561   if (reuse == MAT_INITIAL_MATRIX) {
2562     /* get new tags to keep the communication clean */
2563     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2564     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2565     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2566 
2567     /* post receives of other's nzlocal */
2568     for (i=0; i<nrecvs; i++) {
2569       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2570     }
2571     /* send nzlocal to others */
2572     for (i=0; i<nsends; i++) {
2573       sbuf_nz[i] = nzlocal;
2574       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2575     }
2576     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2577     count = nrecvs;
2578     while (count) {
2579       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2580 
2581       recv_rank[imdex] = recv_status.MPI_SOURCE;
2582       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2583       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2584 
2585       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2586 
2587       rbuf_nz[imdex] += i + 2;
2588 
2589       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2590       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2591       count--;
2592     }
2593     /* wait on sends of nzlocal */
2594     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2595     /* send mat->i,j to others, and recv from other's */
2596     /*------------------------------------------------*/
2597     for (i=0; i<nsends; i++) {
2598       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2599       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2600     }
2601     /* wait on receives of mat->i,j */
2602     /*------------------------------*/
2603     count = nrecvs;
2604     while (count) {
2605       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2606       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2607       count--;
2608     }
2609     /* wait on sends of mat->i,j */
2610     /*---------------------------*/
2611     if (nsends) {
2612       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2613     }
2614   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2615 
2616   /* post receives, send and receive mat->a */
2617   /*----------------------------------------*/
2618   for (imdex=0; imdex<nrecvs; imdex++) {
2619     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2620   }
2621   for (i=0; i<nsends; i++) {
2622     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2623   }
2624   count = nrecvs;
2625   while (count) {
2626     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2627     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2628     count--;
2629   }
2630   if (nsends) {
2631     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2632   }
2633 
2634   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2635 
2636   /* create redundant matrix */
2637   /*-------------------------*/
2638   if (reuse == MAT_INITIAL_MATRIX) {
2639     const PetscInt *range;
2640     PetscInt       rstart_sub,rend_sub,mloc_sub;
2641 
2642     /* compute rownz_max for preallocation */
2643     for (imdex=0; imdex<nrecvs; imdex++) {
2644       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2645       rptr = rbuf_j[imdex];
2646       for (i=0; i<j; i++) {
2647         ncols = rptr[i+1] - rptr[i];
2648         if (rownz_max < ncols) rownz_max = ncols;
2649       }
2650     }
2651 
2652     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2653 
2654     /* get local size of redundant matrix
2655        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2656     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2657     rstart_sub = range[nsubcomm*subrank];
2658     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2659       rend_sub = range[nsubcomm*(subrank+1)];
2660     } else {
2661       rend_sub = mat->rmap->N;
2662     }
2663     mloc_sub = rend_sub - rstart_sub;
2664 
2665     if (M == N) {
2666       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2667     } else { /* non-square matrix */
2668       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2669     }
2670     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2671     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2672     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2673     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2674   } else {
2675     C = *matredundant;
2676   }
2677 
2678   /* insert local matrix entries */
2679   rptr = sbuf_j;
2680   cols = sbuf_j + rend-rstart + 1;
2681   vals = sbuf_a;
2682   for (i=0; i<rend-rstart; i++) {
2683     row   = i + rstart;
2684     ncols = rptr[i+1] - rptr[i];
2685     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2686     vals += ncols;
2687     cols += ncols;
2688   }
2689   /* insert received matrix entries */
2690   for (imdex=0; imdex<nrecvs; imdex++) {
2691     rstart = rowrange[recv_rank[imdex]];
2692     rend   = rowrange[recv_rank[imdex]+1];
2693     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2694     rptr   = rbuf_j[imdex];
2695     cols   = rbuf_j[imdex] + rend-rstart + 1;
2696     vals   = rbuf_a[imdex];
2697     for (i=0; i<rend-rstart; i++) {
2698       row   = i + rstart;
2699       ncols = rptr[i+1] - rptr[i];
2700       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2701       vals += ncols;
2702       cols += ncols;
2703     }
2704   }
2705   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707 
2708   if (reuse == MAT_INITIAL_MATRIX) {
2709     *matredundant = C;
2710 
2711     /* create a supporting struct and attach it to C for reuse */
2712     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2713     if (subsize == 1) {
2714       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2715       c->redundant = redund;
2716     } else {
2717       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2718       c->redundant = redund;
2719     }
2720 
2721     redund->nzlocal   = nzlocal;
2722     redund->nsends    = nsends;
2723     redund->nrecvs    = nrecvs;
2724     redund->send_rank = send_rank;
2725     redund->recv_rank = recv_rank;
2726     redund->sbuf_nz   = sbuf_nz;
2727     redund->rbuf_nz   = rbuf_nz;
2728     redund->sbuf_j    = sbuf_j;
2729     redund->sbuf_a    = sbuf_a;
2730     redund->rbuf_j    = rbuf_j;
2731     redund->rbuf_a    = rbuf_a;
2732     redund->psubcomm  = NULL;
2733 
2734     redund->Destroy = C->ops->destroy;
2735     C->ops->destroy = MatDestroy_MatRedundant;
2736   }
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2742 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2743 {
2744   PetscErrorCode ierr;
2745   MPI_Comm       comm;
2746   PetscMPIInt    size,subsize;
2747   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2748   Mat_Redundant  *redund=NULL;
2749   PetscSubcomm   psubcomm=NULL;
2750   MPI_Comm       subcomm_in=subcomm;
2751   Mat            *matseq;
2752   IS             isrow,iscol;
2753 
2754   PetscFunctionBegin;
2755   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2756     if (reuse ==  MAT_INITIAL_MATRIX) {
2757       /* create psubcomm, then get subcomm */
2758       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2759       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2760       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2761 
2762       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2763       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2764       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2765       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2766       subcomm = psubcomm->comm;
2767     } else { /* retrieve psubcomm and subcomm */
2768       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2769       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2770       if (subsize == 1) {
2771         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2772         redund = c->redundant;
2773       } else {
2774         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2775         redund = c->redundant;
2776       }
2777       psubcomm = redund->psubcomm;
2778     }
2779     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2780       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2781       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2782         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2783         if (subsize == 1) {
2784           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2785           c->redundant->psubcomm = psubcomm;
2786         } else {
2787           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2788           c->redundant->psubcomm = psubcomm ;
2789         }
2790       }
2791       PetscFunctionReturn(0);
2792     }
2793   }
2794 
2795   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2796   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2797   if (reuse == MAT_INITIAL_MATRIX) {
2798     /* create a local sequential matrix matseq[0] */
2799     mloc_sub = PETSC_DECIDE;
2800     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2801     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2802     rstart = rend - mloc_sub;
2803     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2804     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2805   } else { /* reuse == MAT_REUSE_MATRIX */
2806     if (subsize == 1) {
2807       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2808       redund = c->redundant;
2809     } else {
2810       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2811       redund = c->redundant;
2812     }
2813 
2814     isrow  = redund->isrow;
2815     iscol  = redund->iscol;
2816     matseq = redund->matseq;
2817   }
2818   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2819   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2820 
2821   if (reuse == MAT_INITIAL_MATRIX) {
2822     /* create a supporting struct and attach it to C for reuse */
2823     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2824     if (subsize == 1) {
2825       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2826       c->redundant = redund;
2827     } else {
2828       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2829       c->redundant = redund;
2830     }
2831     redund->isrow    = isrow;
2832     redund->iscol    = iscol;
2833     redund->matseq   = matseq;
2834     redund->psubcomm = psubcomm;
2835     redund->Destroy               = (*matredundant)->ops->destroy;
2836     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2837   }
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2843 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2844 {
2845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2846   PetscErrorCode ierr;
2847   PetscInt       i,*idxb = 0;
2848   PetscScalar    *va,*vb;
2849   Vec            vtmp;
2850 
2851   PetscFunctionBegin;
2852   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2853   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2854   if (idx) {
2855     for (i=0; i<A->rmap->n; i++) {
2856       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2857     }
2858   }
2859 
2860   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2861   if (idx) {
2862     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2863   }
2864   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2865   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2866 
2867   for (i=0; i<A->rmap->n; i++) {
2868     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2869       va[i] = vb[i];
2870       if (idx) idx[i] = a->garray[idxb[i]];
2871     }
2872   }
2873 
2874   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2875   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2876   ierr = PetscFree(idxb);CHKERRQ(ierr);
2877   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2883 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2884 {
2885   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2886   PetscErrorCode ierr;
2887   PetscInt       i,*idxb = 0;
2888   PetscScalar    *va,*vb;
2889   Vec            vtmp;
2890 
2891   PetscFunctionBegin;
2892   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2893   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2894   if (idx) {
2895     for (i=0; i<A->cmap->n; i++) {
2896       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2897     }
2898   }
2899 
2900   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2901   if (idx) {
2902     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2903   }
2904   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2905   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2906 
2907   for (i=0; i<A->rmap->n; i++) {
2908     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2909       va[i] = vb[i];
2910       if (idx) idx[i] = a->garray[idxb[i]];
2911     }
2912   }
2913 
2914   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2915   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2916   ierr = PetscFree(idxb);CHKERRQ(ierr);
2917   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 #undef __FUNCT__
2922 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2923 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2924 {
2925   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2926   PetscInt       n      = A->rmap->n;
2927   PetscInt       cstart = A->cmap->rstart;
2928   PetscInt       *cmap  = mat->garray;
2929   PetscInt       *diagIdx, *offdiagIdx;
2930   Vec            diagV, offdiagV;
2931   PetscScalar    *a, *diagA, *offdiagA;
2932   PetscInt       r;
2933   PetscErrorCode ierr;
2934 
2935   PetscFunctionBegin;
2936   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2938   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2939   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2940   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2941   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2942   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2943   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2944   for (r = 0; r < n; ++r) {
2945     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2946       a[r]   = diagA[r];
2947       idx[r] = cstart + diagIdx[r];
2948     } else {
2949       a[r]   = offdiagA[r];
2950       idx[r] = cmap[offdiagIdx[r]];
2951     }
2952   }
2953   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2955   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2956   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2957   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2958   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 #undef __FUNCT__
2963 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2964 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2965 {
2966   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2967   PetscInt       n      = A->rmap->n;
2968   PetscInt       cstart = A->cmap->rstart;
2969   PetscInt       *cmap  = mat->garray;
2970   PetscInt       *diagIdx, *offdiagIdx;
2971   Vec            diagV, offdiagV;
2972   PetscScalar    *a, *diagA, *offdiagA;
2973   PetscInt       r;
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2978   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2979   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2980   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2981   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2982   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2983   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2984   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2985   for (r = 0; r < n; ++r) {
2986     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2987       a[r]   = diagA[r];
2988       idx[r] = cstart + diagIdx[r];
2989     } else {
2990       a[r]   = offdiagA[r];
2991       idx[r] = cmap[offdiagIdx[r]];
2992     }
2993   }
2994   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2995   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2996   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2997   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2998   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2999   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3005 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3006 {
3007   PetscErrorCode ierr;
3008   Mat            *dummy;
3009 
3010   PetscFunctionBegin;
3011   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3012   *newmat = *dummy;
3013   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3019 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3020 {
3021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3022   PetscErrorCode ierr;
3023 
3024   PetscFunctionBegin;
3025   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3026   PetscFunctionReturn(0);
3027 }
3028 
3029 #undef __FUNCT__
3030 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3031 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3032 {
3033   PetscErrorCode ierr;
3034   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3035 
3036   PetscFunctionBegin;
3037   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3038   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3039   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 /* -------------------------------------------------------------------*/
3045 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3046                                        MatGetRow_MPIAIJ,
3047                                        MatRestoreRow_MPIAIJ,
3048                                        MatMult_MPIAIJ,
3049                                 /* 4*/ MatMultAdd_MPIAIJ,
3050                                        MatMultTranspose_MPIAIJ,
3051                                        MatMultTransposeAdd_MPIAIJ,
3052 #if defined(PETSC_HAVE_PBGL)
3053                                        MatSolve_MPIAIJ,
3054 #else
3055                                        0,
3056 #endif
3057                                        0,
3058                                        0,
3059                                 /*10*/ 0,
3060                                        0,
3061                                        0,
3062                                        MatSOR_MPIAIJ,
3063                                        MatTranspose_MPIAIJ,
3064                                 /*15*/ MatGetInfo_MPIAIJ,
3065                                        MatEqual_MPIAIJ,
3066                                        MatGetDiagonal_MPIAIJ,
3067                                        MatDiagonalScale_MPIAIJ,
3068                                        MatNorm_MPIAIJ,
3069                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3070                                        MatAssemblyEnd_MPIAIJ,
3071                                        MatSetOption_MPIAIJ,
3072                                        MatZeroEntries_MPIAIJ,
3073                                 /*24*/ MatZeroRows_MPIAIJ,
3074                                        0,
3075 #if defined(PETSC_HAVE_PBGL)
3076                                        0,
3077 #else
3078                                        0,
3079 #endif
3080                                        0,
3081                                        0,
3082                                 /*29*/ MatSetUp_MPIAIJ,
3083 #if defined(PETSC_HAVE_PBGL)
3084                                        0,
3085 #else
3086                                        0,
3087 #endif
3088                                        0,
3089                                        0,
3090                                        0,
3091                                 /*34*/ MatDuplicate_MPIAIJ,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                        0,
3096                                 /*39*/ MatAXPY_MPIAIJ,
3097                                        MatGetSubMatrices_MPIAIJ,
3098                                        MatIncreaseOverlap_MPIAIJ,
3099                                        MatGetValues_MPIAIJ,
3100                                        MatCopy_MPIAIJ,
3101                                 /*44*/ MatGetRowMax_MPIAIJ,
3102                                        MatScale_MPIAIJ,
3103                                        0,
3104                                        0,
3105                                        MatZeroRowsColumns_MPIAIJ,
3106                                 /*49*/ MatSetRandom_MPIAIJ,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                        0,
3111                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3112                                        0,
3113                                        MatSetUnfactored_MPIAIJ,
3114                                        MatPermute_MPIAIJ,
3115                                        0,
3116                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3117                                        MatDestroy_MPIAIJ,
3118                                        MatView_MPIAIJ,
3119                                        0,
3120                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3121                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3122                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3123                                        0,
3124                                        0,
3125                                        0,
3126                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3127                                        MatGetRowMinAbs_MPIAIJ,
3128                                        0,
3129                                        MatSetColoring_MPIAIJ,
3130                                        0,
3131                                        MatSetValuesAdifor_MPIAIJ,
3132                                 /*75*/ MatFDColoringApply_AIJ,
3133                                        0,
3134                                        0,
3135                                        0,
3136                                        MatFindZeroDiagonals_MPIAIJ,
3137                                 /*80*/ 0,
3138                                        0,
3139                                        0,
3140                                 /*83*/ MatLoad_MPIAIJ,
3141                                        0,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                        0,
3146                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3147                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3148                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3149                                        MatPtAP_MPIAIJ_MPIAIJ,
3150                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3151                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                 /*99*/ 0,
3157                                        0,
3158                                        0,
3159                                        MatConjugate_MPIAIJ,
3160                                        0,
3161                                 /*104*/MatSetValuesRow_MPIAIJ,
3162                                        MatRealPart_MPIAIJ,
3163                                        MatImaginaryPart_MPIAIJ,
3164                                        0,
3165                                        0,
3166                                 /*109*/0,
3167                                        MatGetRedundantMatrix_MPIAIJ,
3168                                        MatGetRowMin_MPIAIJ,
3169                                        0,
3170                                        0,
3171                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3172                                        0,
3173                                        0,
3174                                        0,
3175                                        0,
3176                                 /*119*/0,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                        MatGetMultiProcBlock_MPIAIJ,
3181                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3182                                        MatGetColumnNorms_MPIAIJ,
3183                                        MatInvertBlockDiagonal_MPIAIJ,
3184                                        0,
3185                                        MatGetSubMatricesParallel_MPIAIJ,
3186                                 /*129*/0,
3187                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3188                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3189                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3190                                        0,
3191                                 /*134*/0,
3192                                        0,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                 /*139*/0,
3197                                        0,
3198                                        0,
3199                                        MatFDColoringSetUp_MPIXAIJ
3200 };
3201 
3202 /* ----------------------------------------------------------------------------------------*/
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3206 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3207 {
3208   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3213   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 #undef __FUNCT__
3218 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3219 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3220 {
3221   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3226   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3232 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3233 {
3234   Mat_MPIAIJ     *b;
3235   PetscErrorCode ierr;
3236 
3237   PetscFunctionBegin;
3238   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3240   b = (Mat_MPIAIJ*)B->data;
3241 
3242   if (!B->preallocated) {
3243     /* Explicitly create 2 MATSEQAIJ matrices. */
3244     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3245     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3246     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3247     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3248     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3249     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3250     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3251     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3252     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3253     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3254   }
3255 
3256   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3257   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3258   B->preallocated = PETSC_TRUE;
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3264 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3265 {
3266   Mat            mat;
3267   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3268   PetscErrorCode ierr;
3269 
3270   PetscFunctionBegin;
3271   *newmat = 0;
3272   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3273   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3274   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3275   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3276   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3277   a       = (Mat_MPIAIJ*)mat->data;
3278 
3279   mat->factortype   = matin->factortype;
3280   mat->assembled    = PETSC_TRUE;
3281   mat->insertmode   = NOT_SET_VALUES;
3282   mat->preallocated = PETSC_TRUE;
3283 
3284   a->size         = oldmat->size;
3285   a->rank         = oldmat->rank;
3286   a->donotstash   = oldmat->donotstash;
3287   a->roworiented  = oldmat->roworiented;
3288   a->rowindices   = 0;
3289   a->rowvalues    = 0;
3290   a->getrowactive = PETSC_FALSE;
3291 
3292   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3293   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3294 
3295   if (oldmat->colmap) {
3296 #if defined(PETSC_USE_CTABLE)
3297     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3298 #else
3299     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3300     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3301     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3302 #endif
3303   } else a->colmap = 0;
3304   if (oldmat->garray) {
3305     PetscInt len;
3306     len  = oldmat->B->cmap->n;
3307     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3308     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3309     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3310   } else a->garray = 0;
3311 
3312   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3313   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3314   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3315   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3316   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3317   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3318   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3319   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3320   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3321   *newmat = mat;
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 
3326 
3327 #undef __FUNCT__
3328 #define __FUNCT__ "MatLoad_MPIAIJ"
3329 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3330 {
3331   PetscScalar    *vals,*svals;
3332   MPI_Comm       comm;
3333   PetscErrorCode ierr;
3334   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3335   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3336   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3337   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3338   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3339   int            fd;
3340   PetscInt       bs = 1;
3341 
3342   PetscFunctionBegin;
3343   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3344   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3345   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3346   if (!rank) {
3347     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3348     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3349     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3350   }
3351 
3352   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3353   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3354   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3355 
3356   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3357 
3358   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3359   M    = header[1]; N = header[2];
3360   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3361   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3362   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3363 
3364   /* If global sizes are set, check if they are consistent with that given in the file */
3365   if (sizesset) {
3366     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3367   }
3368   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3369   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3370 
3371   /* determine ownership of all (block) rows */
3372   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3373   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3374   else m = newMat->rmap->n; /* Set by user */
3375 
3376   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3377   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3378 
3379   /* First process needs enough room for process with most rows */
3380   if (!rank) {
3381     mmax = rowners[1];
3382     for (i=2; i<=size; i++) {
3383       mmax = PetscMax(mmax, rowners[i]);
3384     }
3385   } else mmax = -1;             /* unused, but compilers complain */
3386 
3387   rowners[0] = 0;
3388   for (i=2; i<=size; i++) {
3389     rowners[i] += rowners[i-1];
3390   }
3391   rstart = rowners[rank];
3392   rend   = rowners[rank+1];
3393 
3394   /* distribute row lengths to all processors */
3395   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3396   if (!rank) {
3397     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3398     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3399     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3400     for (j=0; j<m; j++) {
3401       procsnz[0] += ourlens[j];
3402     }
3403     for (i=1; i<size; i++) {
3404       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3405       /* calculate the number of nonzeros on each processor */
3406       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3407         procsnz[i] += rowlengths[j];
3408       }
3409       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3410     }
3411     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3412   } else {
3413     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3414   }
3415 
3416   if (!rank) {
3417     /* determine max buffer needed and allocate it */
3418     maxnz = 0;
3419     for (i=0; i<size; i++) {
3420       maxnz = PetscMax(maxnz,procsnz[i]);
3421     }
3422     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3423 
3424     /* read in my part of the matrix column indices  */
3425     nz   = procsnz[0];
3426     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3427     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3428 
3429     /* read in every one elses and ship off */
3430     for (i=1; i<size; i++) {
3431       nz   = procsnz[i];
3432       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3433       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3434     }
3435     ierr = PetscFree(cols);CHKERRQ(ierr);
3436   } else {
3437     /* determine buffer space needed for message */
3438     nz = 0;
3439     for (i=0; i<m; i++) {
3440       nz += ourlens[i];
3441     }
3442     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3443 
3444     /* receive message of column indices*/
3445     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3446   }
3447 
3448   /* determine column ownership if matrix is not square */
3449   if (N != M) {
3450     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3451     else n = newMat->cmap->n;
3452     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3453     cstart = cend - n;
3454   } else {
3455     cstart = rstart;
3456     cend   = rend;
3457     n      = cend - cstart;
3458   }
3459 
3460   /* loop over local rows, determining number of off diagonal entries */
3461   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3462   jj   = 0;
3463   for (i=0; i<m; i++) {
3464     for (j=0; j<ourlens[i]; j++) {
3465       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3466       jj++;
3467     }
3468   }
3469 
3470   for (i=0; i<m; i++) {
3471     ourlens[i] -= offlens[i];
3472   }
3473   if (!sizesset) {
3474     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3475   }
3476 
3477   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3478 
3479   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3480 
3481   for (i=0; i<m; i++) {
3482     ourlens[i] += offlens[i];
3483   }
3484 
3485   if (!rank) {
3486     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3487 
3488     /* read in my part of the matrix numerical values  */
3489     nz   = procsnz[0];
3490     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3491 
3492     /* insert into matrix */
3493     jj      = rstart;
3494     smycols = mycols;
3495     svals   = vals;
3496     for (i=0; i<m; i++) {
3497       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3498       smycols += ourlens[i];
3499       svals   += ourlens[i];
3500       jj++;
3501     }
3502 
3503     /* read in other processors and ship out */
3504     for (i=1; i<size; i++) {
3505       nz   = procsnz[i];
3506       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3507       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3510   } else {
3511     /* receive numeric values */
3512     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3513 
3514     /* receive message of values*/
3515     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3516 
3517     /* insert into matrix */
3518     jj      = rstart;
3519     smycols = mycols;
3520     svals   = vals;
3521     for (i=0; i<m; i++) {
3522       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3523       smycols += ourlens[i];
3524       svals   += ourlens[i];
3525       jj++;
3526     }
3527   }
3528   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3529   ierr = PetscFree(vals);CHKERRQ(ierr);
3530   ierr = PetscFree(mycols);CHKERRQ(ierr);
3531   ierr = PetscFree(rowners);CHKERRQ(ierr);
3532   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3540 {
3541   PetscErrorCode ierr;
3542   IS             iscol_local;
3543   PetscInt       csize;
3544 
3545   PetscFunctionBegin;
3546   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3547   if (call == MAT_REUSE_MATRIX) {
3548     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3549     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3550   } else {
3551     PetscInt cbs;
3552     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3553     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3554     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3555   }
3556   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3557   if (call == MAT_INITIAL_MATRIX) {
3558     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3559     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3560   }
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3567 /*
3568     Not great since it makes two copies of the submatrix, first an SeqAIJ
3569   in local and then by concatenating the local matrices the end result.
3570   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3571 
3572   Note: This requires a sequential iscol with all indices.
3573 */
3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3575 {
3576   PetscErrorCode ierr;
3577   PetscMPIInt    rank,size;
3578   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3579   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3580   PetscBool      allcolumns, colflag;
3581   Mat            M,Mreuse;
3582   MatScalar      *vwork,*aa;
3583   MPI_Comm       comm;
3584   Mat_SeqAIJ     *aij;
3585 
3586   PetscFunctionBegin;
3587   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3590 
3591   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3592   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3593   if (colflag && ncol == mat->cmap->N) {
3594     allcolumns = PETSC_TRUE;
3595   } else {
3596     allcolumns = PETSC_FALSE;
3597   }
3598   if (call ==  MAT_REUSE_MATRIX) {
3599     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3600     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3601     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3602   } else {
3603     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3604   }
3605 
3606   /*
3607       m - number of local rows
3608       n - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3612   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3613   if (call == MAT_INITIAL_MATRIX) {
3614     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3615     ii  = aij->i;
3616     jj  = aij->j;
3617 
3618     /*
3619         Determine the number of non-zeros in the diagonal and off-diagonal
3620         portions of the matrix in order to do correct preallocation
3621     */
3622 
3623     /* first get start and end of "diagonal" columns */
3624     if (csize == PETSC_DECIDE) {
3625       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3626       if (mglobal == n) { /* square matrix */
3627         nlocal = m;
3628       } else {
3629         nlocal = n/size + ((n % size) > rank);
3630       }
3631     } else {
3632       nlocal = csize;
3633     }
3634     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3635     rstart = rend - nlocal;
3636     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3637 
3638     /* next, compute all the lengths */
3639     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3640     olens = dlens + m;
3641     for (i=0; i<m; i++) {
3642       jend = ii[i+1] - ii[i];
3643       olen = 0;
3644       dlen = 0;
3645       for (j=0; j<jend; j++) {
3646         if (*jj < rstart || *jj >= rend) olen++;
3647         else dlen++;
3648         jj++;
3649       }
3650       olens[i] = olen;
3651       dlens[i] = dlen;
3652     }
3653     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3654     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3655     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3656     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3657     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3658     ierr = PetscFree(dlens);CHKERRQ(ierr);
3659   } else {
3660     PetscInt ml,nl;
3661 
3662     M    = *newmat;
3663     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3664     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3665     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3666     /*
3667          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3668        rather than the slower MatSetValues().
3669     */
3670     M->was_assembled = PETSC_TRUE;
3671     M->assembled     = PETSC_FALSE;
3672   }
3673   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3674   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3675   ii   = aij->i;
3676   jj   = aij->j;
3677   aa   = aij->a;
3678   for (i=0; i<m; i++) {
3679     row   = rstart + i;
3680     nz    = ii[i+1] - ii[i];
3681     cwork = jj;     jj += nz;
3682     vwork = aa;     aa += nz;
3683     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3684   }
3685 
3686   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   *newmat = M;
3689 
3690   /* save submatrix used in processor for next request */
3691   if (call ==  MAT_INITIAL_MATRIX) {
3692     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3693     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3694   }
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3700 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3701 {
3702   PetscInt       m,cstart, cend,j,nnz,i,d;
3703   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3704   const PetscInt *JJ;
3705   PetscScalar    *values;
3706   PetscErrorCode ierr;
3707 
3708   PetscFunctionBegin;
3709   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3710 
3711   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3712   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3713   m      = B->rmap->n;
3714   cstart = B->cmap->rstart;
3715   cend   = B->cmap->rend;
3716   rstart = B->rmap->rstart;
3717 
3718   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3719 
3720 #if defined(PETSC_USE_DEBUGGING)
3721   for (i=0; i<m; i++) {
3722     nnz = Ii[i+1]- Ii[i];
3723     JJ  = J + Ii[i];
3724     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3725     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3726     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3727   }
3728 #endif
3729 
3730   for (i=0; i<m; i++) {
3731     nnz     = Ii[i+1]- Ii[i];
3732     JJ      = J + Ii[i];
3733     nnz_max = PetscMax(nnz_max,nnz);
3734     d       = 0;
3735     for (j=0; j<nnz; j++) {
3736       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3737     }
3738     d_nnz[i] = d;
3739     o_nnz[i] = nnz - d;
3740   }
3741   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3742   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3743 
3744   if (v) values = (PetscScalar*)v;
3745   else {
3746     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3747   }
3748 
3749   for (i=0; i<m; i++) {
3750     ii   = i + rstart;
3751     nnz  = Ii[i+1]- Ii[i];
3752     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3753   }
3754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756 
3757   if (!v) {
3758     ierr = PetscFree(values);CHKERRQ(ierr);
3759   }
3760   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3766 /*@
3767    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3768    (the default parallel PETSc format).
3769 
3770    Collective on MPI_Comm
3771 
3772    Input Parameters:
3773 +  B - the matrix
3774 .  i - the indices into j for the start of each local row (starts with zero)
3775 .  j - the column indices for each local row (starts with zero)
3776 -  v - optional values in the matrix
3777 
3778    Level: developer
3779 
3780    Notes:
3781        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3782      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3783      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3784 
3785        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3786 
3787        The format which is used for the sparse matrix input, is equivalent to a
3788     row-major ordering.. i.e for the following matrix, the input data expected is
3789     as shown:
3790 
3791         1 0 0
3792         2 0 3     P0
3793        -------
3794         4 5 6     P1
3795 
3796      Process0 [P0]: rows_owned=[0,1]
3797         i =  {0,1,3}  [size = nrow+1  = 2+1]
3798         j =  {0,0,2}  [size = nz = 6]
3799         v =  {1,2,3}  [size = nz = 6]
3800 
3801      Process1 [P1]: rows_owned=[2]
3802         i =  {0,3}    [size = nrow+1  = 1+1]
3803         j =  {0,1,2}  [size = nz = 6]
3804         v =  {4,5,6}  [size = nz = 6]
3805 
3806 .keywords: matrix, aij, compressed row, sparse, parallel
3807 
3808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3809           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3810 @*/
3811 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3812 {
3813   PetscErrorCode ierr;
3814 
3815   PetscFunctionBegin;
3816   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 #undef __FUNCT__
3821 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3822 /*@C
3823    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3824    (the default parallel PETSc format).  For good matrix assembly performance
3825    the user should preallocate the matrix storage by setting the parameters
3826    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3827    performance can be increased by more than a factor of 50.
3828 
3829    Collective on MPI_Comm
3830 
3831    Input Parameters:
3832 +  A - the matrix
3833 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3834            (same value is used for all local rows)
3835 .  d_nnz - array containing the number of nonzeros in the various rows of the
3836            DIAGONAL portion of the local submatrix (possibly different for each row)
3837            or NULL, if d_nz is used to specify the nonzero structure.
3838            The size of this array is equal to the number of local rows, i.e 'm'.
3839            For matrices that will be factored, you must leave room for (and set)
3840            the diagonal entry even if it is zero.
3841 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3842            submatrix (same value is used for all local rows).
3843 -  o_nnz - array containing the number of nonzeros in the various rows of the
3844            OFF-DIAGONAL portion of the local submatrix (possibly different for
3845            each row) or NULL, if o_nz is used to specify the nonzero
3846            structure. The size of this array is equal to the number
3847            of local rows, i.e 'm'.
3848 
3849    If the *_nnz parameter is given then the *_nz parameter is ignored
3850 
3851    The AIJ format (also called the Yale sparse matrix format or
3852    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3853    storage.  The stored row and column indices begin with zero.
3854    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3855 
3856    The parallel matrix is partitioned such that the first m0 rows belong to
3857    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3858    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3859 
3860    The DIAGONAL portion of the local submatrix of a processor can be defined
3861    as the submatrix which is obtained by extraction the part corresponding to
3862    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3863    first row that belongs to the processor, r2 is the last row belonging to
3864    the this processor, and c1-c2 is range of indices of the local part of a
3865    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3866    common case of a square matrix, the row and column ranges are the same and
3867    the DIAGONAL part is also square. The remaining portion of the local
3868    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3869 
3870    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3871 
3872    You can call MatGetInfo() to get information on how effective the preallocation was;
3873    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3874    You can also run with the option -info and look for messages with the string
3875    malloc in them to see if additional memory allocation was needed.
3876 
3877    Example usage:
3878 
3879    Consider the following 8x8 matrix with 34 non-zero values, that is
3880    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3881    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3882    as follows:
3883 
3884 .vb
3885             1  2  0  |  0  3  0  |  0  4
3886     Proc0   0  5  6  |  7  0  0  |  8  0
3887             9  0 10  | 11  0  0  | 12  0
3888     -------------------------------------
3889            13  0 14  | 15 16 17  |  0  0
3890     Proc1   0 18  0  | 19 20 21  |  0  0
3891             0  0  0  | 22 23  0  | 24  0
3892     -------------------------------------
3893     Proc2  25 26 27  |  0  0 28  | 29  0
3894            30  0  0  | 31 32 33  |  0 34
3895 .ve
3896 
3897    This can be represented as a collection of submatrices as:
3898 
3899 .vb
3900       A B C
3901       D E F
3902       G H I
3903 .ve
3904 
3905    Where the submatrices A,B,C are owned by proc0, D,E,F are
3906    owned by proc1, G,H,I are owned by proc2.
3907 
3908    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3909    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3910    The 'M','N' parameters are 8,8, and have the same values on all procs.
3911 
3912    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3913    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3914    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3915    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3916    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3917    matrix, ans [DF] as another SeqAIJ matrix.
3918 
3919    When d_nz, o_nz parameters are specified, d_nz storage elements are
3920    allocated for every row of the local diagonal submatrix, and o_nz
3921    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3922    One way to choose d_nz and o_nz is to use the max nonzerors per local
3923    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3924    In this case, the values of d_nz,o_nz are:
3925 .vb
3926      proc0 : dnz = 2, o_nz = 2
3927      proc1 : dnz = 3, o_nz = 2
3928      proc2 : dnz = 1, o_nz = 4
3929 .ve
3930    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3931    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3932    for proc3. i.e we are using 12+15+10=37 storage locations to store
3933    34 values.
3934 
3935    When d_nnz, o_nnz parameters are specified, the storage is specified
3936    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3937    In the above case the values for d_nnz,o_nnz are:
3938 .vb
3939      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3940      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3941      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3942 .ve
3943    Here the space allocated is sum of all the above values i.e 34, and
3944    hence pre-allocation is perfect.
3945 
3946    Level: intermediate
3947 
3948 .keywords: matrix, aij, compressed row, sparse, parallel
3949 
3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3951           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3952 @*/
3953 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3954 {
3955   PetscErrorCode ierr;
3956 
3957   PetscFunctionBegin;
3958   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3959   PetscValidType(B,1);
3960   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 #undef __FUNCT__
3965 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3966 /*@
3967      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3968          CSR format the local rows.
3969 
3970    Collective on MPI_Comm
3971 
3972    Input Parameters:
3973 +  comm - MPI communicator
3974 .  m - number of local rows (Cannot be PETSC_DECIDE)
3975 .  n - This value should be the same as the local size used in creating the
3976        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3977        calculated if N is given) For square matrices n is almost always m.
3978 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3979 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3980 .   i - row indices
3981 .   j - column indices
3982 -   a - matrix values
3983 
3984    Output Parameter:
3985 .   mat - the matrix
3986 
3987    Level: intermediate
3988 
3989    Notes:
3990        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3991      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3992      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3993 
3994        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3995 
3996        The format which is used for the sparse matrix input, is equivalent to a
3997     row-major ordering.. i.e for the following matrix, the input data expected is
3998     as shown:
3999 
4000         1 0 0
4001         2 0 3     P0
4002        -------
4003         4 5 6     P1
4004 
4005      Process0 [P0]: rows_owned=[0,1]
4006         i =  {0,1,3}  [size = nrow+1  = 2+1]
4007         j =  {0,0,2}  [size = nz = 6]
4008         v =  {1,2,3}  [size = nz = 6]
4009 
4010      Process1 [P1]: rows_owned=[2]
4011         i =  {0,3}    [size = nrow+1  = 1+1]
4012         j =  {0,1,2}  [size = nz = 6]
4013         v =  {4,5,6}  [size = nz = 6]
4014 
4015 .keywords: matrix, aij, compressed row, sparse, parallel
4016 
4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4018           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4019 @*/
4020 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4021 {
4022   PetscErrorCode ierr;
4023 
4024   PetscFunctionBegin;
4025   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4026   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4027   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4028   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4029   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4030   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4031   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatCreateAIJ"
4037 /*@C
4038    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4039    (the default parallel PETSc format).  For good matrix assembly performance
4040    the user should preallocate the matrix storage by setting the parameters
4041    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4042    performance can be increased by more than a factor of 50.
4043 
4044    Collective on MPI_Comm
4045 
4046    Input Parameters:
4047 +  comm - MPI communicator
4048 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4049            This value should be the same as the local size used in creating the
4050            y vector for the matrix-vector product y = Ax.
4051 .  n - This value should be the same as the local size used in creating the
4052        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4053        calculated if N is given) For square matrices n is almost always m.
4054 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4055 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4056 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4057            (same value is used for all local rows)
4058 .  d_nnz - array containing the number of nonzeros in the various rows of the
4059            DIAGONAL portion of the local submatrix (possibly different for each row)
4060            or NULL, if d_nz is used to specify the nonzero structure.
4061            The size of this array is equal to the number of local rows, i.e 'm'.
4062 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4063            submatrix (same value is used for all local rows).
4064 -  o_nnz - array containing the number of nonzeros in the various rows of the
4065            OFF-DIAGONAL portion of the local submatrix (possibly different for
4066            each row) or NULL, if o_nz is used to specify the nonzero
4067            structure. The size of this array is equal to the number
4068            of local rows, i.e 'm'.
4069 
4070    Output Parameter:
4071 .  A - the matrix
4072 
4073    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4074    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4075    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4076 
4077    Notes:
4078    If the *_nnz parameter is given then the *_nz parameter is ignored
4079 
4080    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4081    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4082    storage requirements for this matrix.
4083 
4084    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4085    processor than it must be used on all processors that share the object for
4086    that argument.
4087 
4088    The user MUST specify either the local or global matrix dimensions
4089    (possibly both).
4090 
4091    The parallel matrix is partitioned across processors such that the
4092    first m0 rows belong to process 0, the next m1 rows belong to
4093    process 1, the next m2 rows belong to process 2 etc.. where
4094    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4095    values corresponding to [m x N] submatrix.
4096 
4097    The columns are logically partitioned with the n0 columns belonging
4098    to 0th partition, the next n1 columns belonging to the next
4099    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4100 
4101    The DIAGONAL portion of the local submatrix on any given processor
4102    is the submatrix corresponding to the rows and columns m,n
4103    corresponding to the given processor. i.e diagonal matrix on
4104    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4105    etc. The remaining portion of the local submatrix [m x (N-n)]
4106    constitute the OFF-DIAGONAL portion. The example below better
4107    illustrates this concept.
4108 
4109    For a square global matrix we define each processor's diagonal portion
4110    to be its local rows and the corresponding columns (a square submatrix);
4111    each processor's off-diagonal portion encompasses the remainder of the
4112    local matrix (a rectangular submatrix).
4113 
4114    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4115 
4116    When calling this routine with a single process communicator, a matrix of
4117    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4118    type of communicator, use the construction mechanism:
4119      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4120 
4121    By default, this format uses inodes (identical nodes) when possible.
4122    We search for consecutive rows with the same nonzero structure, thereby
4123    reusing matrix information to achieve increased efficiency.
4124 
4125    Options Database Keys:
4126 +  -mat_no_inode  - Do not use inodes
4127 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4128 -  -mat_aij_oneindex - Internally use indexing starting at 1
4129         rather than 0.  Note that when calling MatSetValues(),
4130         the user still MUST index entries starting at 0!
4131 
4132 
4133    Example usage:
4134 
4135    Consider the following 8x8 matrix with 34 non-zero values, that is
4136    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4137    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4138    as follows:
4139 
4140 .vb
4141             1  2  0  |  0  3  0  |  0  4
4142     Proc0   0  5  6  |  7  0  0  |  8  0
4143             9  0 10  | 11  0  0  | 12  0
4144     -------------------------------------
4145            13  0 14  | 15 16 17  |  0  0
4146     Proc1   0 18  0  | 19 20 21  |  0  0
4147             0  0  0  | 22 23  0  | 24  0
4148     -------------------------------------
4149     Proc2  25 26 27  |  0  0 28  | 29  0
4150            30  0  0  | 31 32 33  |  0 34
4151 .ve
4152 
4153    This can be represented as a collection of submatrices as:
4154 
4155 .vb
4156       A B C
4157       D E F
4158       G H I
4159 .ve
4160 
4161    Where the submatrices A,B,C are owned by proc0, D,E,F are
4162    owned by proc1, G,H,I are owned by proc2.
4163 
4164    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166    The 'M','N' parameters are 8,8, and have the same values on all procs.
4167 
4168    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4169    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4170    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4171    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4172    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4173    matrix, ans [DF] as another SeqAIJ matrix.
4174 
4175    When d_nz, o_nz parameters are specified, d_nz storage elements are
4176    allocated for every row of the local diagonal submatrix, and o_nz
4177    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4178    One way to choose d_nz and o_nz is to use the max nonzerors per local
4179    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4180    In this case, the values of d_nz,o_nz are:
4181 .vb
4182      proc0 : dnz = 2, o_nz = 2
4183      proc1 : dnz = 3, o_nz = 2
4184      proc2 : dnz = 1, o_nz = 4
4185 .ve
4186    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4187    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4188    for proc3. i.e we are using 12+15+10=37 storage locations to store
4189    34 values.
4190 
4191    When d_nnz, o_nnz parameters are specified, the storage is specified
4192    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4193    In the above case the values for d_nnz,o_nnz are:
4194 .vb
4195      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4196      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4197      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4198 .ve
4199    Here the space allocated is sum of all the above values i.e 34, and
4200    hence pre-allocation is perfect.
4201 
4202    Level: intermediate
4203 
4204 .keywords: matrix, aij, compressed row, sparse, parallel
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4207           MPIAIJ, MatCreateMPIAIJWithArrays()
4208 @*/
4209 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4210 {
4211   PetscErrorCode ierr;
4212   PetscMPIInt    size;
4213 
4214   PetscFunctionBegin;
4215   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4216   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4218   if (size > 1) {
4219     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4220     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4221   } else {
4222     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4223     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4224   }
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 #undef __FUNCT__
4229 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4230 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4231 {
4232   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4233 
4234   PetscFunctionBegin;
4235   *Ad     = a->A;
4236   *Ao     = a->B;
4237   *colmap = a->garray;
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4243 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4244 {
4245   PetscErrorCode ierr;
4246   PetscInt       i;
4247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4248 
4249   PetscFunctionBegin;
4250   if (coloring->ctype == IS_COLORING_GLOBAL) {
4251     ISColoringValue *allcolors,*colors;
4252     ISColoring      ocoloring;
4253 
4254     /* set coloring for diagonal portion */
4255     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4256 
4257     /* set coloring for off-diagonal portion */
4258     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4259     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4260     for (i=0; i<a->B->cmap->n; i++) {
4261       colors[i] = allcolors[a->garray[i]];
4262     }
4263     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4264     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4265     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4266     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4267   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4268     ISColoringValue *colors;
4269     PetscInt        *larray;
4270     ISColoring      ocoloring;
4271 
4272     /* set coloring for diagonal portion */
4273     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4274     for (i=0; i<a->A->cmap->n; i++) {
4275       larray[i] = i + A->cmap->rstart;
4276     }
4277     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4278     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->A->cmap->n; i++) {
4280       colors[i] = coloring->colors[larray[i]];
4281     }
4282     ierr = PetscFree(larray);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286 
4287     /* set coloring for off-diagonal portion */
4288     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4289     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4290     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4291     for (i=0; i<a->B->cmap->n; i++) {
4292       colors[i] = coloring->colors[larray[i]];
4293     }
4294     ierr = PetscFree(larray);CHKERRQ(ierr);
4295     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4296     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4297     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4298   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 #undef __FUNCT__
4303 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4304 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4305 {
4306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4307   PetscErrorCode ierr;
4308 
4309   PetscFunctionBegin;
4310   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4311   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 #undef __FUNCT__
4316 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4317 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4318 {
4319   PetscErrorCode ierr;
4320   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4321   PetscInt       *indx;
4322 
4323   PetscFunctionBegin;
4324   /* This routine will ONLY return MPIAIJ type matrix */
4325   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4326   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4327   if (n == PETSC_DECIDE) {
4328     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4329   }
4330   /* Check sum(n) = N */
4331   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4332   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4333 
4334   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4335   rstart -= m;
4336 
4337   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4338   for (i=0; i<m; i++) {
4339     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4340     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4341     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4342   }
4343 
4344   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4345   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4346   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4347   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4348   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4349   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4350   PetscFunctionReturn(0);
4351 }
4352 
4353 #undef __FUNCT__
4354 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4355 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4356 {
4357   PetscErrorCode ierr;
4358   PetscInt       m,N,i,rstart,nnz,Ii;
4359   PetscInt       *indx;
4360   PetscScalar    *values;
4361 
4362   PetscFunctionBegin;
4363   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4364   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4365   for (i=0; i<m; i++) {
4366     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4367     Ii   = i + rstart;
4368     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4369     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4370   }
4371   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4372   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4373   PetscFunctionReturn(0);
4374 }
4375 
4376 #undef __FUNCT__
4377 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4378 /*@
4379       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4380                  matrices from each processor
4381 
4382     Collective on MPI_Comm
4383 
4384    Input Parameters:
4385 +    comm - the communicators the parallel matrix will live on
4386 .    inmat - the input sequential matrices
4387 .    n - number of local columns (or PETSC_DECIDE)
4388 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4389 
4390    Output Parameter:
4391 .    outmat - the parallel matrix generated
4392 
4393     Level: advanced
4394 
4395    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4396 
4397 @*/
4398 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4399 {
4400   PetscErrorCode ierr;
4401   PetscMPIInt    size;
4402 
4403   PetscFunctionBegin;
4404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4405   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4406   if (size == 1) {
4407     if (scall == MAT_INITIAL_MATRIX) {
4408       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4409     } else {
4410       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4411     }
4412   } else {
4413     if (scall == MAT_INITIAL_MATRIX) {
4414       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4415     }
4416     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4417   }
4418   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 #undef __FUNCT__
4423 #define __FUNCT__ "MatFileSplit"
4424 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4425 {
4426   PetscErrorCode    ierr;
4427   PetscMPIInt       rank;
4428   PetscInt          m,N,i,rstart,nnz;
4429   size_t            len;
4430   const PetscInt    *indx;
4431   PetscViewer       out;
4432   char              *name;
4433   Mat               B;
4434   const PetscScalar *values;
4435 
4436   PetscFunctionBegin;
4437   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4438   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4439   /* Should this be the type of the diagonal block of A? */
4440   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4441   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4442   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4443   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4444   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4445   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4446   for (i=0; i<m; i++) {
4447     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4448     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4449     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4450   }
4451   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453 
4454   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4455   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4456   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4457   sprintf(name,"%s.%d",outfile,rank);
4458   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4459   ierr = PetscFree(name);CHKERRQ(ierr);
4460   ierr = MatView(B,out);CHKERRQ(ierr);
4461   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4462   ierr = MatDestroy(&B);CHKERRQ(ierr);
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4467 #undef __FUNCT__
4468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4469 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4470 {
4471   PetscErrorCode      ierr;
4472   Mat_Merge_SeqsToMPI *merge;
4473   PetscContainer      container;
4474 
4475   PetscFunctionBegin;
4476   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4477   if (container) {
4478     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4491     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4492     ierr = PetscFree(merge);CHKERRQ(ierr);
4493     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4494   }
4495   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 #include <../src/mat/utils/freespace.h>
4500 #include <petscbt.h>
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4504 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4505 {
4506   PetscErrorCode      ierr;
4507   MPI_Comm            comm;
4508   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4509   PetscMPIInt         size,rank,taga,*len_s;
4510   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4511   PetscInt            proc,m;
4512   PetscInt            **buf_ri,**buf_rj;
4513   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4514   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4515   MPI_Request         *s_waits,*r_waits;
4516   MPI_Status          *status;
4517   MatScalar           *aa=a->a;
4518   MatScalar           **abuf_r,*ba_i;
4519   Mat_Merge_SeqsToMPI *merge;
4520   PetscContainer      container;
4521 
4522   PetscFunctionBegin;
4523   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4524   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4525 
4526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4528 
4529   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4530   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4531 
4532   bi     = merge->bi;
4533   bj     = merge->bj;
4534   buf_ri = merge->buf_ri;
4535   buf_rj = merge->buf_rj;
4536 
4537   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4538   owners = merge->rowmap->range;
4539   len_s  = merge->len_s;
4540 
4541   /* send and recv matrix values */
4542   /*-----------------------------*/
4543   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4544   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4545 
4546   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4547   for (proc=0,k=0; proc<size; proc++) {
4548     if (!len_s[proc]) continue;
4549     i    = owners[proc];
4550     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4551     k++;
4552   }
4553 
4554   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4555   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4556   ierr = PetscFree(status);CHKERRQ(ierr);
4557 
4558   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4559   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4560 
4561   /* insert mat values of mpimat */
4562   /*----------------------------*/
4563   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4564   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4565 
4566   for (k=0; k<merge->nrecv; k++) {
4567     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4568     nrows       = *(buf_ri_k[k]);
4569     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4570     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4571   }
4572 
4573   /* set values of ba */
4574   m = merge->rowmap->n;
4575   for (i=0; i<m; i++) {
4576     arow = owners[rank] + i;
4577     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4578     bnzi = bi[i+1] - bi[i];
4579     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4580 
4581     /* add local non-zero vals of this proc's seqmat into ba */
4582     anzi   = ai[arow+1] - ai[arow];
4583     aj     = a->j + ai[arow];
4584     aa     = a->a + ai[arow];
4585     nextaj = 0;
4586     for (j=0; nextaj<anzi; j++) {
4587       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4588         ba_i[j] += aa[nextaj++];
4589       }
4590     }
4591 
4592     /* add received vals into ba */
4593     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4594       /* i-th row */
4595       if (i == *nextrow[k]) {
4596         anzi   = *(nextai[k]+1) - *nextai[k];
4597         aj     = buf_rj[k] + *(nextai[k]);
4598         aa     = abuf_r[k] + *(nextai[k]);
4599         nextaj = 0;
4600         for (j=0; nextaj<anzi; j++) {
4601           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4602             ba_i[j] += aa[nextaj++];
4603           }
4604         }
4605         nextrow[k]++; nextai[k]++;
4606       }
4607     }
4608     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4609   }
4610   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612 
4613   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4615   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4616   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4617   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4622 
4623 #undef __FUNCT__
4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4625 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4626 {
4627   PetscErrorCode      ierr;
4628   Mat                 B_mpi;
4629   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4630   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4631   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4632   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4633   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4634   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4635   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4636   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4637   MPI_Status          *status;
4638   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4639   PetscBT             lnkbt;
4640   Mat_Merge_SeqsToMPI *merge;
4641   PetscContainer      container;
4642 
4643   PetscFunctionBegin;
4644   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4645 
4646   /* make sure it is a PETSc comm */
4647   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4650 
4651   ierr = PetscNew(&merge);CHKERRQ(ierr);
4652   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4653 
4654   /* determine row ownership */
4655   /*---------------------------------------------------------*/
4656   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4657   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4661   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4662   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4663 
4664   m      = merge->rowmap->n;
4665   owners = merge->rowmap->range;
4666 
4667   /* determine the number of messages to send, their lengths */
4668   /*---------------------------------------------------------*/
4669   len_s = merge->len_s;
4670 
4671   len          = 0; /* length of buf_si[] */
4672   merge->nsend = 0;
4673   for (proc=0; proc<size; proc++) {
4674     len_si[proc] = 0;
4675     if (proc == rank) {
4676       len_s[proc] = 0;
4677     } else {
4678       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4679       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4680     }
4681     if (len_s[proc]) {
4682       merge->nsend++;
4683       nrows = 0;
4684       for (i=owners[proc]; i<owners[proc+1]; i++) {
4685         if (ai[i+1] > ai[i]) nrows++;
4686       }
4687       len_si[proc] = 2*(nrows+1);
4688       len         += len_si[proc];
4689     }
4690   }
4691 
4692   /* determine the number and length of messages to receive for ij-structure */
4693   /*-------------------------------------------------------------------------*/
4694   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4695   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4696 
4697   /* post the Irecv of j-structure */
4698   /*-------------------------------*/
4699   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4700   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4701 
4702   /* post the Isend of j-structure */
4703   /*--------------------------------*/
4704   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4705 
4706   for (proc=0, k=0; proc<size; proc++) {
4707     if (!len_s[proc]) continue;
4708     i    = owners[proc];
4709     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4710     k++;
4711   }
4712 
4713   /* receives and sends of j-structure are complete */
4714   /*------------------------------------------------*/
4715   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4716   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4717 
4718   /* send and recv i-structure */
4719   /*---------------------------*/
4720   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4721   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4722 
4723   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4724   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4725   for (proc=0,k=0; proc<size; proc++) {
4726     if (!len_s[proc]) continue;
4727     /* form outgoing message for i-structure:
4728          buf_si[0]:                 nrows to be sent
4729                [1:nrows]:           row index (global)
4730                [nrows+1:2*nrows+1]: i-structure index
4731     */
4732     /*-------------------------------------------*/
4733     nrows       = len_si[proc]/2 - 1;
4734     buf_si_i    = buf_si + nrows+1;
4735     buf_si[0]   = nrows;
4736     buf_si_i[0] = 0;
4737     nrows       = 0;
4738     for (i=owners[proc]; i<owners[proc+1]; i++) {
4739       anzi = ai[i+1] - ai[i];
4740       if (anzi) {
4741         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4742         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4743         nrows++;
4744       }
4745     }
4746     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4747     k++;
4748     buf_si += len_si[proc];
4749   }
4750 
4751   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4752   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4753 
4754   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4755   for (i=0; i<merge->nrecv; i++) {
4756     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4757   }
4758 
4759   ierr = PetscFree(len_si);CHKERRQ(ierr);
4760   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4761   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4762   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4763   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4764   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4765   ierr = PetscFree(status);CHKERRQ(ierr);
4766 
4767   /* compute a local seq matrix in each processor */
4768   /*----------------------------------------------*/
4769   /* allocate bi array and free space for accumulating nonzero column info */
4770   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4771   bi[0] = 0;
4772 
4773   /* create and initialize a linked list */
4774   nlnk = N+1;
4775   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4776 
4777   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4778   len  = ai[owners[rank+1]] - ai[owners[rank]];
4779   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4780 
4781   current_space = free_space;
4782 
4783   /* determine symbolic info for each local row */
4784   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4785 
4786   for (k=0; k<merge->nrecv; k++) {
4787     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4788     nrows       = *buf_ri_k[k];
4789     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4790     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4791   }
4792 
4793   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4794   len  = 0;
4795   for (i=0; i<m; i++) {
4796     bnzi = 0;
4797     /* add local non-zero cols of this proc's seqmat into lnk */
4798     arow  = owners[rank] + i;
4799     anzi  = ai[arow+1] - ai[arow];
4800     aj    = a->j + ai[arow];
4801     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4802     bnzi += nlnk;
4803     /* add received col data into lnk */
4804     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4805       if (i == *nextrow[k]) { /* i-th row */
4806         anzi  = *(nextai[k]+1) - *nextai[k];
4807         aj    = buf_rj[k] + *nextai[k];
4808         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4809         bnzi += nlnk;
4810         nextrow[k]++; nextai[k]++;
4811       }
4812     }
4813     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4814 
4815     /* if free space is not available, make more free space */
4816     if (current_space->local_remaining<bnzi) {
4817       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4818       nspacedouble++;
4819     }
4820     /* copy data into free space, then initialize lnk */
4821     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4822     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4823 
4824     current_space->array           += bnzi;
4825     current_space->local_used      += bnzi;
4826     current_space->local_remaining -= bnzi;
4827 
4828     bi[i+1] = bi[i] + bnzi;
4829   }
4830 
4831   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4832 
4833   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4834   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4835   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4836 
4837   /* create symbolic parallel matrix B_mpi */
4838   /*---------------------------------------*/
4839   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4840   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4841   if (n==PETSC_DECIDE) {
4842     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4843   } else {
4844     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4845   }
4846   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4847   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4848   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4849   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4850   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4851 
4852   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4853   B_mpi->assembled    = PETSC_FALSE;
4854   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4855   merge->bi           = bi;
4856   merge->bj           = bj;
4857   merge->buf_ri       = buf_ri;
4858   merge->buf_rj       = buf_rj;
4859   merge->coi          = NULL;
4860   merge->coj          = NULL;
4861   merge->owners_co    = NULL;
4862 
4863   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4864 
4865   /* attach the supporting struct to B_mpi for reuse */
4866   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4867   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4868   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4869   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4870   *mpimat = B_mpi;
4871 
4872   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4878 /*@C
4879       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4880                  matrices from each processor
4881 
4882     Collective on MPI_Comm
4883 
4884    Input Parameters:
4885 +    comm - the communicators the parallel matrix will live on
4886 .    seqmat - the input sequential matrices
4887 .    m - number of local rows (or PETSC_DECIDE)
4888 .    n - number of local columns (or PETSC_DECIDE)
4889 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4890 
4891    Output Parameter:
4892 .    mpimat - the parallel matrix generated
4893 
4894     Level: advanced
4895 
4896    Notes:
4897      The dimensions of the sequential matrix in each processor MUST be the same.
4898      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4899      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4900 @*/
4901 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4902 {
4903   PetscErrorCode ierr;
4904   PetscMPIInt    size;
4905 
4906   PetscFunctionBegin;
4907   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4908   if (size == 1) {
4909     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910     if (scall == MAT_INITIAL_MATRIX) {
4911       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4912     } else {
4913       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4914     }
4915     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916     PetscFunctionReturn(0);
4917   }
4918   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919   if (scall == MAT_INITIAL_MATRIX) {
4920     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4921   }
4922   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4923   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 #undef __FUNCT__
4928 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4929 /*@
4930      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4931           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4932           with MatGetSize()
4933 
4934     Not Collective
4935 
4936    Input Parameters:
4937 +    A - the matrix
4938 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 
4940    Output Parameter:
4941 .    A_loc - the local sequential matrix generated
4942 
4943     Level: developer
4944 
4945 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4946 
4947 @*/
4948 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4949 {
4950   PetscErrorCode ierr;
4951   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4952   Mat_SeqAIJ     *mat,*a,*b;
4953   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4954   MatScalar      *aa,*ba,*cam;
4955   PetscScalar    *ca;
4956   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4957   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4958   PetscBool      match;
4959 
4960   PetscFunctionBegin;
4961   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4962   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4963   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4964   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4965   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4966   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4967   aa = a->a; ba = b->a;
4968   if (scall == MAT_INITIAL_MATRIX) {
4969     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4970     ci[0] = 0;
4971     for (i=0; i<am; i++) {
4972       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4973     }
4974     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4975     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4976     k    = 0;
4977     for (i=0; i<am; i++) {
4978       ncols_o = bi[i+1] - bi[i];
4979       ncols_d = ai[i+1] - ai[i];
4980       /* off-diagonal portion of A */
4981       for (jo=0; jo<ncols_o; jo++) {
4982         col = cmap[*bj];
4983         if (col >= cstart) break;
4984         cj[k]   = col; bj++;
4985         ca[k++] = *ba++;
4986       }
4987       /* diagonal portion of A */
4988       for (j=0; j<ncols_d; j++) {
4989         cj[k]   = cstart + *aj++;
4990         ca[k++] = *aa++;
4991       }
4992       /* off-diagonal portion of A */
4993       for (j=jo; j<ncols_o; j++) {
4994         cj[k]   = cmap[*bj++];
4995         ca[k++] = *ba++;
4996       }
4997     }
4998     /* put together the new matrix */
4999     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5000     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5001     /* Since these are PETSc arrays, change flags to free them as necessary. */
5002     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5003     mat->free_a  = PETSC_TRUE;
5004     mat->free_ij = PETSC_TRUE;
5005     mat->nonew   = 0;
5006   } else if (scall == MAT_REUSE_MATRIX) {
5007     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5008     ci = mat->i; cj = mat->j; cam = mat->a;
5009     for (i=0; i<am; i++) {
5010       /* off-diagonal portion of A */
5011       ncols_o = bi[i+1] - bi[i];
5012       for (jo=0; jo<ncols_o; jo++) {
5013         col = cmap[*bj];
5014         if (col >= cstart) break;
5015         *cam++ = *ba++; bj++;
5016       }
5017       /* diagonal portion of A */
5018       ncols_d = ai[i+1] - ai[i];
5019       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5020       /* off-diagonal portion of A */
5021       for (j=jo; j<ncols_o; j++) {
5022         *cam++ = *ba++; bj++;
5023       }
5024     }
5025   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5026   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5027   PetscFunctionReturn(0);
5028 }
5029 
5030 #undef __FUNCT__
5031 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5032 /*@C
5033      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5034 
5035     Not Collective
5036 
5037    Input Parameters:
5038 +    A - the matrix
5039 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5040 -    row, col - index sets of rows and columns to extract (or NULL)
5041 
5042    Output Parameter:
5043 .    A_loc - the local sequential matrix generated
5044 
5045     Level: developer
5046 
5047 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5048 
5049 @*/
5050 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5051 {
5052   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5053   PetscErrorCode ierr;
5054   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5055   IS             isrowa,iscola;
5056   Mat            *aloc;
5057   PetscBool      match;
5058 
5059   PetscFunctionBegin;
5060   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5061   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5062   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5063   if (!row) {
5064     start = A->rmap->rstart; end = A->rmap->rend;
5065     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5066   } else {
5067     isrowa = *row;
5068   }
5069   if (!col) {
5070     start = A->cmap->rstart;
5071     cmap  = a->garray;
5072     nzA   = a->A->cmap->n;
5073     nzB   = a->B->cmap->n;
5074     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5075     ncols = 0;
5076     for (i=0; i<nzB; i++) {
5077       if (cmap[i] < start) idx[ncols++] = cmap[i];
5078       else break;
5079     }
5080     imark = i;
5081     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5082     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5083     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5084   } else {
5085     iscola = *col;
5086   }
5087   if (scall != MAT_INITIAL_MATRIX) {
5088     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5089     aloc[0] = *A_loc;
5090   }
5091   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5092   *A_loc = aloc[0];
5093   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5094   if (!row) {
5095     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5096   }
5097   if (!col) {
5098     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5099   }
5100   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5101   PetscFunctionReturn(0);
5102 }
5103 
5104 #undef __FUNCT__
5105 #define __FUNCT__ "MatGetBrowsOfAcols"
5106 /*@C
5107     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5108 
5109     Collective on Mat
5110 
5111    Input Parameters:
5112 +    A,B - the matrices in mpiaij format
5113 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5114 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5115 
5116    Output Parameter:
5117 +    rowb, colb - index sets of rows and columns of B to extract
5118 -    B_seq - the sequential matrix generated
5119 
5120     Level: developer
5121 
5122 @*/
5123 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5124 {
5125   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5126   PetscErrorCode ierr;
5127   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5128   IS             isrowb,iscolb;
5129   Mat            *bseq=NULL;
5130 
5131   PetscFunctionBegin;
5132   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5133     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5134   }
5135   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5136 
5137   if (scall == MAT_INITIAL_MATRIX) {
5138     start = A->cmap->rstart;
5139     cmap  = a->garray;
5140     nzA   = a->A->cmap->n;
5141     nzB   = a->B->cmap->n;
5142     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5143     ncols = 0;
5144     for (i=0; i<nzB; i++) {  /* row < local row index */
5145       if (cmap[i] < start) idx[ncols++] = cmap[i];
5146       else break;
5147     }
5148     imark = i;
5149     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5150     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5151     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5152     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5153   } else {
5154     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5155     isrowb  = *rowb; iscolb = *colb;
5156     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5157     bseq[0] = *B_seq;
5158   }
5159   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5160   *B_seq = bseq[0];
5161   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5162   if (!rowb) {
5163     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5164   } else {
5165     *rowb = isrowb;
5166   }
5167   if (!colb) {
5168     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5169   } else {
5170     *colb = iscolb;
5171   }
5172   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5173   PetscFunctionReturn(0);
5174 }
5175 
5176 #undef __FUNCT__
5177 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5178 /*
5179     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5180     of the OFF-DIAGONAL portion of local A
5181 
5182     Collective on Mat
5183 
5184    Input Parameters:
5185 +    A,B - the matrices in mpiaij format
5186 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5187 
5188    Output Parameter:
5189 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5190 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5191 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5192 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5193 
5194     Level: developer
5195 
5196 */
5197 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5198 {
5199   VecScatter_MPI_General *gen_to,*gen_from;
5200   PetscErrorCode         ierr;
5201   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5202   Mat_SeqAIJ             *b_oth;
5203   VecScatter             ctx =a->Mvctx;
5204   MPI_Comm               comm;
5205   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5206   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5207   PetscScalar            *rvalues,*svalues;
5208   MatScalar              *b_otha,*bufa,*bufA;
5209   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5210   MPI_Request            *rwaits = NULL,*swaits = NULL;
5211   MPI_Status             *sstatus,rstatus;
5212   PetscMPIInt            jj;
5213   PetscInt               *cols,sbs,rbs;
5214   PetscScalar            *vals;
5215 
5216   PetscFunctionBegin;
5217   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5218   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5219     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5220   }
5221   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5222   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5223 
5224   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5225   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5226   rvalues  = gen_from->values; /* holds the length of receiving row */
5227   svalues  = gen_to->values;   /* holds the length of sending row */
5228   nrecvs   = gen_from->n;
5229   nsends   = gen_to->n;
5230 
5231   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5232   srow    = gen_to->indices;    /* local row index to be sent */
5233   sstarts = gen_to->starts;
5234   sprocs  = gen_to->procs;
5235   sstatus = gen_to->sstatus;
5236   sbs     = gen_to->bs;
5237   rstarts = gen_from->starts;
5238   rprocs  = gen_from->procs;
5239   rbs     = gen_from->bs;
5240 
5241   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5242   if (scall == MAT_INITIAL_MATRIX) {
5243     /* i-array */
5244     /*---------*/
5245     /*  post receives */
5246     for (i=0; i<nrecvs; i++) {
5247       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5248       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5249       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5250     }
5251 
5252     /* pack the outgoing message */
5253     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5254 
5255     sstartsj[0] = 0;
5256     rstartsj[0] = 0;
5257     len         = 0; /* total length of j or a array to be sent */
5258     k           = 0;
5259     for (i=0; i<nsends; i++) {
5260       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5261       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5262       for (j=0; j<nrows; j++) {
5263         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5264         for (l=0; l<sbs; l++) {
5265           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5266 
5267           rowlen[j*sbs+l] = ncols;
5268 
5269           len += ncols;
5270           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5271         }
5272         k++;
5273       }
5274       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5275 
5276       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5277     }
5278     /* recvs and sends of i-array are completed */
5279     i = nrecvs;
5280     while (i--) {
5281       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5282     }
5283     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5284 
5285     /* allocate buffers for sending j and a arrays */
5286     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5287     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5288 
5289     /* create i-array of B_oth */
5290     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5291 
5292     b_othi[0] = 0;
5293     len       = 0; /* total length of j or a array to be received */
5294     k         = 0;
5295     for (i=0; i<nrecvs; i++) {
5296       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5297       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5298       for (j=0; j<nrows; j++) {
5299         b_othi[k+1] = b_othi[k] + rowlen[j];
5300         len        += rowlen[j]; k++;
5301       }
5302       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5303     }
5304 
5305     /* allocate space for j and a arrrays of B_oth */
5306     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5307     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5308 
5309     /* j-array */
5310     /*---------*/
5311     /*  post receives of j-array */
5312     for (i=0; i<nrecvs; i++) {
5313       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5314       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5315     }
5316 
5317     /* pack the outgoing message j-array */
5318     k = 0;
5319     for (i=0; i<nsends; i++) {
5320       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5321       bufJ  = bufj+sstartsj[i];
5322       for (j=0; j<nrows; j++) {
5323         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5324         for (ll=0; ll<sbs; ll++) {
5325           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5326           for (l=0; l<ncols; l++) {
5327             *bufJ++ = cols[l];
5328           }
5329           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5330         }
5331       }
5332       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5333     }
5334 
5335     /* recvs and sends of j-array are completed */
5336     i = nrecvs;
5337     while (i--) {
5338       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5339     }
5340     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5341   } else if (scall == MAT_REUSE_MATRIX) {
5342     sstartsj = *startsj_s;
5343     rstartsj = *startsj_r;
5344     bufa     = *bufa_ptr;
5345     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5346     b_otha   = b_oth->a;
5347   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5348 
5349   /* a-array */
5350   /*---------*/
5351   /*  post receives of a-array */
5352   for (i=0; i<nrecvs; i++) {
5353     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5354     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5355   }
5356 
5357   /* pack the outgoing message a-array */
5358   k = 0;
5359   for (i=0; i<nsends; i++) {
5360     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5361     bufA  = bufa+sstartsj[i];
5362     for (j=0; j<nrows; j++) {
5363       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5364       for (ll=0; ll<sbs; ll++) {
5365         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5366         for (l=0; l<ncols; l++) {
5367           *bufA++ = vals[l];
5368         }
5369         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5370       }
5371     }
5372     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5373   }
5374   /* recvs and sends of a-array are completed */
5375   i = nrecvs;
5376   while (i--) {
5377     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5378   }
5379   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5380   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5381 
5382   if (scall == MAT_INITIAL_MATRIX) {
5383     /* put together the new matrix */
5384     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5385 
5386     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5387     /* Since these are PETSc arrays, change flags to free them as necessary. */
5388     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5389     b_oth->free_a  = PETSC_TRUE;
5390     b_oth->free_ij = PETSC_TRUE;
5391     b_oth->nonew   = 0;
5392 
5393     ierr = PetscFree(bufj);CHKERRQ(ierr);
5394     if (!startsj_s || !bufa_ptr) {
5395       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5396       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5397     } else {
5398       *startsj_s = sstartsj;
5399       *startsj_r = rstartsj;
5400       *bufa_ptr  = bufa;
5401     }
5402   }
5403   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5404   PetscFunctionReturn(0);
5405 }
5406 
5407 #undef __FUNCT__
5408 #define __FUNCT__ "MatGetCommunicationStructs"
5409 /*@C
5410   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5411 
5412   Not Collective
5413 
5414   Input Parameters:
5415 . A - The matrix in mpiaij format
5416 
5417   Output Parameter:
5418 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5419 . colmap - A map from global column index to local index into lvec
5420 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5421 
5422   Level: developer
5423 
5424 @*/
5425 #if defined(PETSC_USE_CTABLE)
5426 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5427 #else
5428 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5429 #endif
5430 {
5431   Mat_MPIAIJ *a;
5432 
5433   PetscFunctionBegin;
5434   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5435   PetscValidPointer(lvec, 2);
5436   PetscValidPointer(colmap, 3);
5437   PetscValidPointer(multScatter, 4);
5438   a = (Mat_MPIAIJ*) A->data;
5439   if (lvec) *lvec = a->lvec;
5440   if (colmap) *colmap = a->colmap;
5441   if (multScatter) *multScatter = a->Mvctx;
5442   PetscFunctionReturn(0);
5443 }
5444 
5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5448 
5449 #undef __FUNCT__
5450 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5451 /*
5452     Computes (B'*A')' since computing B*A directly is untenable
5453 
5454                n                       p                          p
5455         (              )       (              )         (                  )
5456       m (      A       )  *  n (       B      )   =   m (         C        )
5457         (              )       (              )         (                  )
5458 
5459 */
5460 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5461 {
5462   PetscErrorCode ierr;
5463   Mat            At,Bt,Ct;
5464 
5465   PetscFunctionBegin;
5466   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5467   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5468   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5469   ierr = MatDestroy(&At);CHKERRQ(ierr);
5470   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5471   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5472   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 #undef __FUNCT__
5477 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5478 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5479 {
5480   PetscErrorCode ierr;
5481   PetscInt       m=A->rmap->n,n=B->cmap->n;
5482   Mat            Cmat;
5483 
5484   PetscFunctionBegin;
5485   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5486   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5487   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5488   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5489   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5490   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5491   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5492   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493 
5494   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5495 
5496   *C = Cmat;
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 /* ----------------------------------------------------------------*/
5501 #undef __FUNCT__
5502 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5503 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5504 {
5505   PetscErrorCode ierr;
5506 
5507   PetscFunctionBegin;
5508   if (scall == MAT_INITIAL_MATRIX) {
5509     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5510     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5511     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5512   }
5513   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5514   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5515   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5516   PetscFunctionReturn(0);
5517 }
5518 
5519 #if defined(PETSC_HAVE_MUMPS)
5520 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5521 #endif
5522 #if defined(PETSC_HAVE_PASTIX)
5523 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5524 #endif
5525 #if defined(PETSC_HAVE_SUPERLU_DIST)
5526 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5527 #endif
5528 #if defined(PETSC_HAVE_CLIQUE)
5529 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5530 #endif
5531 
5532 /*MC
5533    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5534 
5535    Options Database Keys:
5536 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5537 
5538   Level: beginner
5539 
5540 .seealso: MatCreateAIJ()
5541 M*/
5542 
5543 #undef __FUNCT__
5544 #define __FUNCT__ "MatCreate_MPIAIJ"
5545 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5546 {
5547   Mat_MPIAIJ     *b;
5548   PetscErrorCode ierr;
5549   PetscMPIInt    size;
5550 
5551   PetscFunctionBegin;
5552   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5553 
5554   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5555   B->data       = (void*)b;
5556   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5557   B->assembled  = PETSC_FALSE;
5558   B->insertmode = NOT_SET_VALUES;
5559   b->size       = size;
5560 
5561   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5562 
5563   /* build cache for off array entries formed */
5564   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5565 
5566   b->donotstash  = PETSC_FALSE;
5567   b->colmap      = 0;
5568   b->garray      = 0;
5569   b->roworiented = PETSC_TRUE;
5570 
5571   /* stuff used for matrix vector multiply */
5572   b->lvec  = NULL;
5573   b->Mvctx = NULL;
5574 
5575   /* stuff for MatGetRow() */
5576   b->rowindices   = 0;
5577   b->rowvalues    = 0;
5578   b->getrowactive = PETSC_FALSE;
5579 
5580   /* flexible pointer used in CUSP/CUSPARSE classes */
5581   b->spptr = NULL;
5582 
5583 #if defined(PETSC_HAVE_MUMPS)
5584   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5585 #endif
5586 #if defined(PETSC_HAVE_PASTIX)
5587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5588 #endif
5589 #if defined(PETSC_HAVE_SUPERLU_DIST)
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5591 #endif
5592 #if defined(PETSC_HAVE_CLIQUE)
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5594 #endif
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5609   PetscFunctionReturn(0);
5610 }
5611 
5612 #undef __FUNCT__
5613 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5614 /*@
5615      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5616          and "off-diagonal" part of the matrix in CSR format.
5617 
5618    Collective on MPI_Comm
5619 
5620    Input Parameters:
5621 +  comm - MPI communicator
5622 .  m - number of local rows (Cannot be PETSC_DECIDE)
5623 .  n - This value should be the same as the local size used in creating the
5624        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5625        calculated if N is given) For square matrices n is almost always m.
5626 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5627 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5628 .   i - row indices for "diagonal" portion of matrix
5629 .   j - column indices
5630 .   a - matrix values
5631 .   oi - row indices for "off-diagonal" portion of matrix
5632 .   oj - column indices
5633 -   oa - matrix values
5634 
5635    Output Parameter:
5636 .   mat - the matrix
5637 
5638    Level: advanced
5639 
5640    Notes:
5641        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5642        must free the arrays once the matrix has been destroyed and not before.
5643 
5644        The i and j indices are 0 based
5645 
5646        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5647 
5648        This sets local rows and cannot be used to set off-processor values.
5649 
5650        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5651        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5652        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5653        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5654        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5655        communication if it is known that only local entries will be set.
5656 
5657 .keywords: matrix, aij, compressed row, sparse, parallel
5658 
5659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5660           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5661 @*/
5662 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5663 {
5664   PetscErrorCode ierr;
5665   Mat_MPIAIJ     *maij;
5666 
5667   PetscFunctionBegin;
5668   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5669   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5670   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5671   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5672   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5673   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5674   maij = (Mat_MPIAIJ*) (*mat)->data;
5675 
5676   (*mat)->preallocated = PETSC_TRUE;
5677 
5678   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5679   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5680 
5681   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5682   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5683 
5684   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688 
5689   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5692   PetscFunctionReturn(0);
5693 }
5694 
5695 /*
5696     Special version for direct calls from Fortran
5697 */
5698 #include <petsc-private/fortranimpl.h>
5699 
5700 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5701 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5702 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5703 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5704 #endif
5705 
5706 /* Change these macros so can be used in void function */
5707 #undef CHKERRQ
5708 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5709 #undef SETERRQ2
5710 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5711 #undef SETERRQ3
5712 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ
5714 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5715 
5716 #undef __FUNCT__
5717 #define __FUNCT__ "matsetvaluesmpiaij_"
5718 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5719 {
5720   Mat            mat  = *mmat;
5721   PetscInt       m    = *mm, n = *mn;
5722   InsertMode     addv = *maddv;
5723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5724   PetscScalar    value;
5725   PetscErrorCode ierr;
5726 
5727   MatCheckPreallocated(mat,1);
5728   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5729 
5730 #if defined(PETSC_USE_DEBUG)
5731   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5732 #endif
5733   {
5734     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5735     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5736     PetscBool roworiented = aij->roworiented;
5737 
5738     /* Some Variables required in the macro */
5739     Mat        A                 = aij->A;
5740     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5741     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5742     MatScalar  *aa               = a->a;
5743     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5744     Mat        B                 = aij->B;
5745     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5746     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5747     MatScalar  *ba               = b->a;
5748 
5749     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5750     PetscInt  nonew = a->nonew;
5751     MatScalar *ap1,*ap2;
5752 
5753     PetscFunctionBegin;
5754     for (i=0; i<m; i++) {
5755       if (im[i] < 0) continue;
5756 #if defined(PETSC_USE_DEBUG)
5757       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5758 #endif
5759       if (im[i] >= rstart && im[i] < rend) {
5760         row      = im[i] - rstart;
5761         lastcol1 = -1;
5762         rp1      = aj + ai[row];
5763         ap1      = aa + ai[row];
5764         rmax1    = aimax[row];
5765         nrow1    = ailen[row];
5766         low1     = 0;
5767         high1    = nrow1;
5768         lastcol2 = -1;
5769         rp2      = bj + bi[row];
5770         ap2      = ba + bi[row];
5771         rmax2    = bimax[row];
5772         nrow2    = bilen[row];
5773         low2     = 0;
5774         high2    = nrow2;
5775 
5776         for (j=0; j<n; j++) {
5777           if (roworiented) value = v[i*n+j];
5778           else value = v[i+j*m];
5779           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5780           if (in[j] >= cstart && in[j] < cend) {
5781             col = in[j] - cstart;
5782             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5783           } else if (in[j] < 0) continue;
5784 #if defined(PETSC_USE_DEBUG)
5785           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5786 #endif
5787           else {
5788             if (mat->was_assembled) {
5789               if (!aij->colmap) {
5790                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5791               }
5792 #if defined(PETSC_USE_CTABLE)
5793               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5794               col--;
5795 #else
5796               col = aij->colmap[in[j]] - 1;
5797 #endif
5798               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5799                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5800                 col  =  in[j];
5801                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5802                 B     = aij->B;
5803                 b     = (Mat_SeqAIJ*)B->data;
5804                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5805                 rp2   = bj + bi[row];
5806                 ap2   = ba + bi[row];
5807                 rmax2 = bimax[row];
5808                 nrow2 = bilen[row];
5809                 low2  = 0;
5810                 high2 = nrow2;
5811                 bm    = aij->B->rmap->n;
5812                 ba    = b->a;
5813               }
5814             } else col = in[j];
5815             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5816           }
5817         }
5818       } else if (!aij->donotstash) {
5819         if (roworiented) {
5820           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5821         } else {
5822           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5823         }
5824       }
5825     }
5826   }
5827   PetscFunctionReturnVoid();
5828 }
5829 
5830